mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 08:07:19 +08:00
fix: to always use grfs count in calculateNumThreadsPerThreadGroup
grf size != grf count Related-To: GSD-8437 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
df54d67f40
commit
da7b03dd15
@@ -1029,12 +1029,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
|
||||
auto numGrf = GrfConfig::defaultGrfNumber;
|
||||
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - ImplicitArgs::getSize(), MemoryConstants::cacheLineSize);
|
||||
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, rootDeviceEnvironment);
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
|
||||
|
||||
auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgs::getSize();
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
|
||||
alignedFree(expectedLocalIds);
|
||||
@@ -1075,12 +1076,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
dispatchKernelWithImplicitArgs<FamilyType>();
|
||||
|
||||
auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth);
|
||||
auto numGrf = GrfConfig::defaultGrfNumber;
|
||||
auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - ImplicitArgs::getSize(), MemoryConstants::cacheLineSize);
|
||||
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, rootDeviceEnvironment);
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, numGrf, rootDeviceEnvironment);
|
||||
|
||||
auto localIdsProgrammingSize = implicitArgsProgrammingSize - ImplicitArgs::getSize();
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, numGrf, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, rootDeviceEnvironment);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
|
||||
alignedFree(expectedLocalIds);
|
||||
|
||||
@@ -306,6 +306,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
|
||||
mockKernel.module = &mockModule;
|
||||
const auto &device = mockModule.getDevice();
|
||||
auto grfSize = device->getHwInfo().capabilityTable.grfSize;
|
||||
auto numGrf = GrfConfig::defaultGrfNumber;
|
||||
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
uint32_t groupSize[3] = {2, 3, 5};
|
||||
auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
|
||||
@@ -315,13 +316,14 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
|
||||
auto numThreadsPerTG = gfxHelper.calculateNumThreadsPerThreadGroup(
|
||||
mockKernel.descriptor.kernelAttributes.simdSize,
|
||||
groupSize[0] * groupSize[1] * groupSize[2],
|
||||
grfSize,
|
||||
numGrf,
|
||||
mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime,
|
||||
rootDeviceEnvironment);
|
||||
auto perThreadDataSizeForWholeTGNeeded =
|
||||
static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
|
||||
mockKernel.descriptor.kernelAttributes.simdSize,
|
||||
grfSize,
|
||||
numGrf,
|
||||
mockKernel.descriptor.kernelAttributes.numLocalIdChannels,
|
||||
groupSize[0] * groupSize[1] * groupSize[2],
|
||||
!mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime,
|
||||
|
||||
Reference in New Issue
Block a user