mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
fix: Unify logic calculating threads per work group part 3
Related-To: NEO-8087 Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d2f1cf98d7
commit
61f701aba5
@@ -345,7 +345,7 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
|
||||
uint32_t perThreadDataSizeForWholeThreadGroupNeeded =
|
||||
static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
|
||||
simdSize, grfSize, numChannels, itemsInGroup));
|
||||
simdSize, grfSize, numChannels, itemsInGroup, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper));
|
||||
if (perThreadDataSizeForWholeThreadGroupNeeded >
|
||||
perThreadDataSizeForWholeThreadGroupAllocated) {
|
||||
alignedFree(perThreadDataForWholeThreadGroup);
|
||||
|
||||
@@ -991,7 +991,8 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor);
|
||||
const auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||
auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup();
|
||||
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize);
|
||||
@@ -1027,7 +1028,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize, gfxCoreHelper);
|
||||
|
||||
auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
|
||||
alignedFree(expectedLocalIds);
|
||||
@@ -1073,7 +1074,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv
|
||||
generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize, gfxCoreHelper);
|
||||
|
||||
auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize);
|
||||
size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize, !kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds));
|
||||
alignedFree(expectedLocalIds);
|
||||
|
||||
@@ -2062,7 +2062,8 @@ struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKe
|
||||
template <typename FamilyType>
|
||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::KernelImp> &kernel) {
|
||||
if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor());
|
||||
const auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), !kernel.kernelRequiresGenerationOfLocalIdsByRuntime, gfxCoreHelper);
|
||||
return implicitArgsProgrammingSize - sizeof(ImplicitArgs);
|
||||
} else {
|
||||
return 0u;
|
||||
|
||||
Reference in New Issue
Block a user