fix: correct region barrier size calculation

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-06-21 11:16:36 +00:00
committed by Compute-Runtime-Automation
parent 1854bc4a60
commit 8698e7fb43
4 changed files with 36 additions and 2 deletions

View File

@@ -219,6 +219,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::partitionCount;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::programRegionGroupBarrier;
using BaseClass::requiredStreamState;
using BaseClass::requiresQueueUncachedMocs;
using BaseClass::signalAllEventPackets;

View File

@@ -13,6 +13,7 @@
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/program/sync_buffer_handler.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
@@ -46,6 +47,27 @@ HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImm
EXPECT_EQ(2u, commandList->partitionCount);
}
HWTEST2_F(MultiTileImmediateCommandListTest, givenMultipleTilesWhenAllocatingBarrierSyncBufferThenEnsureCorrectSize, IsAtLeastXeHpCore) {
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.MultiTileArchInfo.TileCount = 3;
Mock<KernelImp> mockKernel;
auto cmdListImmediate = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(static_cast<L0::CommandListImp *>(commandList.get()));
auto whiteBoxCmdList = static_cast<WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdListImmediate);
whiteBoxCmdList->programRegionGroupBarrier(mockKernel);
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(1);
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
size_t expectedOffset = alignUp(3 * hwInfo.gtSystemInfo.MaxSubSlicesSupported * sizeof(uint64_t), MemoryConstants::cacheLineSize);
EXPECT_EQ(patchData.second, expectedOffset);
}
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true, false, -1>>;
HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {

View File

@@ -532,7 +532,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
EXPECT_EQ(patchPtr2, patchPtr + MemoryConstants::cacheLineSize);
auto offset = alignUp(device->getHwInfo().gtSystemInfo.MaxSubSlicesSupported * sizeof(uint64_t), MemoryConstants::cacheLineSize);
EXPECT_EQ(patchPtr2, patchPtr + offset);
}
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, IsAtLeastSkl) {