mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
fix: correct region barrier size calculation
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
1854bc4a60
commit
8698e7fb43
@ -2768,7 +2768,16 @@ void CommandListCoreFamily<gfxCoreFamily>::programRegionGroupBarrier(Kernel &ker
|
||||
auto neoDevice = device->getNEODevice();
|
||||
|
||||
neoDevice->allocateSyncBufferHandler();
|
||||
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(MemoryConstants::cacheLineSize);
|
||||
|
||||
auto >SysInfo = device->getNEODevice()->getHardwareInfo().gtSystemInfo;
|
||||
|
||||
auto tileCount = std::max(gtSysInfo.MultiTileArchInfo.TileCount, uint8_t(1)); // Use physical count
|
||||
|
||||
constexpr size_t barrierSizePerSubslice = sizeof(uint64_t);
|
||||
|
||||
size_t size = alignUp(tileCount * gtSysInfo.MaxSubSlicesSupported * barrierSizePerSubslice, MemoryConstants::cacheLineSize);
|
||||
|
||||
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(size);
|
||||
|
||||
kernel.patchRegionGroupBarrier(patchData.first, patchData.second);
|
||||
}
|
||||
|
@ -219,6 +219,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipeControlMultiKernelEventSync;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::programRegionGroupBarrier;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::requiresQueueUncachedMocs;
|
||||
using BaseClass::signalAllEventPackets;
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/program/sync_buffer_handler.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
@ -46,6 +47,27 @@ HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImm
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileImmediateCommandListTest, givenMultipleTilesWhenAllocatingBarrierSyncBufferThenEnsureCorrectSize, IsAtLeastXeHpCore) {
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.MultiTileArchInfo.TileCount = 3;
|
||||
|
||||
Mock<KernelImp> mockKernel;
|
||||
|
||||
auto cmdListImmediate = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(static_cast<L0::CommandListImp *>(commandList.get()));
|
||||
auto whiteBoxCmdList = static_cast<WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdListImmediate);
|
||||
|
||||
whiteBoxCmdList->programRegionGroupBarrier(mockKernel);
|
||||
|
||||
auto patchData = neoDevice->syncBufferHandler->obtainAllocationAndOffset(1);
|
||||
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
|
||||
size_t expectedOffset = alignUp(3 * hwInfo.gtSystemInfo.MaxSubSlicesSupported * sizeof(uint64_t), MemoryConstants::cacheLineSize);
|
||||
|
||||
EXPECT_EQ(patchData.second, expectedOffset);
|
||||
}
|
||||
|
||||
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true, false, -1>>;
|
||||
|
||||
HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {
|
||||
|
@ -532,7 +532,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||
|
||||
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||
|
||||
EXPECT_EQ(patchPtr2, patchPtr + MemoryConstants::cacheLineSize);
|
||||
auto offset = alignUp(device->getHwInfo().gtSystemInfo.MaxSubSlicesSupported * sizeof(uint64_t), MemoryConstants::cacheLineSize);
|
||||
|
||||
EXPECT_EQ(patchPtr2, patchPtr + offset);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, IsAtLeastSkl) {
|
||||
|
Reference in New Issue
Block a user