From 31f32cc16e6600cd54f69c481b0af5fc2877d8ae Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Fri, 7 Apr 2023 08:57:50 +0200 Subject: [PATCH] fix implicit args: generate local ids as for grf size 32 Related-To: IGC-6936 Signed-off-by: Mateusz Jablonski --- .../test_cmdlist_append_launch_kernel_2.cpp | 6 ++-- .../test_cmdlist_append_launch_kernel_3.cpp | 2 +- .../helpers/hardware_commands_helper_base.inl | 2 +- ...hardware_commands_helper_bdw_and_later.inl | 5 ++- ...ardware_commands_helper_xehp_and_later.inl | 5 ++- .../command_queue/dispatch_walker_tests.cpp | 2 +- .../hardware_commands_helper_tests.cpp | 6 ++-- .../command_encoder_bdw_and_later.inl | 4 +-- .../command_encoder_xehp_and_later.inl | 4 +-- shared/source/kernel/implicit_args.h | 7 ++-- shared/source/kernel/implicit_args_helper.cpp | 15 ++++---- .../kernel/implicit_args_helper_tests.cpp | 35 ++++++++----------- 12 files changed, 41 insertions(+), 52 deletions(-) diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index f911c5c272..b8b7cd0fa1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -991,7 +991,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, neoDevice->getHardwareInfo()); + implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup(); EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize); @@ -1021,7 +1021,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv dispatchKernelWithImplicitArgs(); - auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); + auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); @@ -1066,7 +1066,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, giv dispatchKernelWithImplicitArgs(); - auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); + auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 5677e8de4e..3d56476075 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -663,7 +663,7 @@ struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKe template uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) { if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) { - auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo()); + auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor()); return implicitArgsProgrammingSize - sizeof(ImplicitArgs); } else { return 0u; diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 17361fa5cf..6b7f52fd4c 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -58,7 +58,7 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH(const Kernel &kerne auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { - size += ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); + size += ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor); } return alignUp(size, WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl index 87abb1f626..15c5dab2d9 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl @@ -87,12 +87,11 @@ size_t HardwareCommandsHelper::sendCrossThreadData( auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { const auto &kernelDescriptor = kernel.getDescriptor(); - const auto &hwInfo = kernel.getHardwareInfo(); - auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); + auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor); auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, hwInfo, {}); + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}); auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index 33a3565cb6..c463a5a4f9 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -76,8 +76,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( pImplicitArgs->localIdTablePtr = indirectHeap.getGraphicsAllocation()->getGpuAddress() + offsetCrossThreadData; const auto &kernelDescriptor = kernel.getDescriptor(); - const auto &hwInfo = kernel.getHardwareInfo(); - auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); + auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor); auto sizeForLocalIdsProgramming = sizeForImplicitArgsProgramming - sizeof(ImplicitArgs); offsetCrossThreadData += sizeForLocalIdsProgramming; @@ -98,7 +97,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( requiredWalkOrder, kernelDescriptor.kernelAttributes.simdSize); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, hwInfo, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder)); + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder)); } using InlineData = typename GfxFamily::INLINE_DATA; diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index cb199430ea..a6f8044892 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -1483,7 +1483,7 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp uint32_t grfSize = sizeof(typename FamilyType::GRF); auto size = kernelWithImplicitArgs.getCrossThreadDataSize() + HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, Math::computeTotalElementsCount(localWorkgroupSize)) + - ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), *defaultHwInfo); + ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor()); size = alignUp(size, MemoryConstants::cacheLineSize); EXPECT_EQ(size, iohSizeWithImplicitArgs); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 69ec4f78c8..36f697ffcf 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1312,7 +1312,7 @@ struct HardwareCommandsImplicitArgsTests : Test { kernel.setGlobalWorkOffsetValues(static_cast(expectedImplicitArgs.globalOffsetX), static_cast(expectedImplicitArgs.globalOffsetY), static_cast(expectedImplicitArgs.globalOffsetZ)); kernel.setNumWorkGroupsValues(expectedImplicitArgs.groupCountX, expectedImplicitArgs.groupCountY, expectedImplicitArgs.groupCountZ); - implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernel.getDescriptor(), pDevice->getHardwareInfo()); + implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernel.getDescriptor()); auto sizeCrossThreadData = kernel.getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( @@ -1372,7 +1372,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI dispatchKernelWithImplicitArgs(); - auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); + auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); @@ -1405,7 +1405,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithI dispatchKernelWithImplicitArgs(); - auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); + auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index f19cab7ff1..fe21923a3d 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -149,7 +149,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis sizePerThreadData, hwInfo); uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; - uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); + uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor); uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching; uint64_t offsetThreadData = 0u; { @@ -171,7 +171,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto implicitArgsCrossThreadPtr = ptrOffset(const_cast(reinterpret_cast(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, hwInfo, {}); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}); } memcpy_s(ptr, sizeCrossThreadData, diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index fbdb124f72..9e2a47ef52 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -178,7 +178,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; - uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); + uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor); uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching; { auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); @@ -196,7 +196,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (pImplicitArgs) { offsetThreadData -= sizeof(ImplicitArgs); pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, hwInfo, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder)); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder)); } if (sizeCrossThreadData > 0) { diff --git a/shared/source/kernel/implicit_args.h b/shared/source/kernel/implicit_args.h index ace79554c9..8079b78bc9 100644 --- a/shared/source/kernel/implicit_args.h +++ b/shared/source/kernel/implicit_args.h @@ -15,7 +15,6 @@ namespace NEO { struct KernelDescriptor; -struct HardwareInfo; struct ImplicitArgs { uint8_t structSize; @@ -49,8 +48,8 @@ inline constexpr const char *implicitArgsRelocationSymbolName = "__INTEL_PATCH_C namespace ImplicitArgsHelper { std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional> hwGenerationOfLocalIdsParams); -uint32_t getGrfSize(uint32_t simd, uint32_t grfSize); -uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo); -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo, std::optional> hwGenerationOfLocalIdsParams); +uint32_t getGrfSize(uint32_t simd); +uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor); +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams); } // namespace ImplicitArgsHelper } // namespace NEO diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp index 3d733d019b..24f783a705 100644 --- a/shared/source/kernel/implicit_args_helper.cpp +++ b/shared/source/kernel/implicit_args_helper.cpp @@ -7,7 +7,6 @@ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" -#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_walk_order.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/ptr_math.h" @@ -35,14 +34,14 @@ std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDime return HwWalkOrderHelper::compatibleDimensionOrders[walkOrderForHwGenerationOfLocalIds]; } -uint32_t getGrfSize(uint32_t simd, uint32_t grfSize) { +uint32_t getGrfSize(uint32_t simd) { if (simd == 1u) { return 3 * sizeof(uint16_t); } - return grfSize; + return 32u; } -uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo) { +uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor) { if (!pImplicitArgs) { return 0; } @@ -52,7 +51,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const return alignUp(implicitArgsSize, MemoryConstants::cacheLineSize); } else { auto simdSize = pImplicitArgs->simdWidth; - auto grfSize = NEO::ImplicitArgsHelper::getGrfSize(simdSize, hardwareInfo.capabilityTable.grfSize); + auto grfSize = NEO::ImplicitArgsHelper::getGrfSize(simdSize); Vec3 localWorkSize = {pImplicitArgs->localSizeX, pImplicitArgs->localSizeY, pImplicitArgs->localSizeZ}; auto itemsInGroup = Math::computeTotalElementsCount(localWorkSize); uint32_t localIdsSizeNeeded = @@ -63,15 +62,15 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const } } -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hardwareInfo, std::optional> hwGenerationOfLocalIdsParams) { +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams) { - auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hardwareInfo); + auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor); auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram); auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); if (!patchImplicitArgsBufferInCrossThread) { auto simdSize = implicitArgs.simdWidth; - auto grfSize = getGrfSize(simdSize, hardwareInfo.capabilityTable.grfSize); + auto grfSize = getGrfSize(simdSize); auto dimensionOrder = getDimensionOrderForLocalIds(kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, hwGenerationOfLocalIdsParams); NEO::generateLocalIDs( diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp index c054a849d5..aefe5d7590 100644 --- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp +++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp @@ -11,7 +11,6 @@ #include "shared/source/helpers/ptr_math.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" -#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/hw_test.h" using namespace NEO; @@ -43,23 +42,21 @@ TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByHwThenProperDimensionOrde } TEST(ImplicitArgsHelperTest, whenGettingGrfSizeForSimd1ThenSizeOfSingleLocalIdIsReturned) { - auto regularGrfsize = 32u; - EXPECT_EQ(3 * sizeof(uint16_t), ImplicitArgsHelper::getGrfSize(1u, regularGrfsize)); + EXPECT_EQ(3 * sizeof(uint16_t), ImplicitArgsHelper::getGrfSize(1u)); } -TEST(ImplicitArgsHelperTest, givenSimdGreaterThanOneWhenGettingGrfSizeThenInputGrfSizeIsReturned) { +TEST(ImplicitArgsHelperTest, givenSimdGreaterThanOneWhenGettingGrfSizeThenGrfSize32IsReturned) { auto regularGrfsize = 32u; - EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(8u, regularGrfsize)); - EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(16u, regularGrfsize)); - EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(32u, regularGrfsize)); + EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(8u)); + EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(16u)); + EXPECT_EQ(regularGrfsize, ImplicitArgsHelper::getGrfSize(32u)); } TEST(ImplicitArgsHelperTest, givenNoImplicitArgsWhenGettingSizeForImplicitArgsProgrammingThenZeroIsReturned) { KernelDescriptor kernelDescriptor{}; - const auto &hwInfo = *defaultHwInfo; - EXPECT_EQ(0u, ImplicitArgsHelper::getSizeForImplicitArgsPatching(nullptr, kernelDescriptor, hwInfo)); + EXPECT_EQ(0u, ImplicitArgsHelper::getSizeForImplicitArgsPatching(nullptr, kernelDescriptor)); } TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenGettingSizeForImplicitArgsProgrammingThenCorrectSizeIsReturned) { @@ -68,7 +65,6 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP KernelDescriptor kernelDescriptor{}; EXPECT_TRUE(isUndefinedOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); - const auto &hwInfo = *defaultHwInfo; implicitArgs.simdWidth = 32; implicitArgs.localSizeX = 2; @@ -77,8 +73,8 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ; - auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.simdWidth, hwInfo.capabilityTable.grfSize, 3u, totalWorkgroupSize), MemoryConstants::cacheLineSize); - EXPECT_EQ(localIdsSize + implicitArgs.structSize, ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hwInfo)); + auto localIdsSize = alignUp(PerThreadDataHelper::getPerThreadDataSizeTotal(implicitArgs.simdWidth, 32u /* grfSize */, 3u /* num channels */, totalWorkgroupSize), MemoryConstants::cacheLineSize); + EXPECT_EQ(localIdsSize + implicitArgs.structSize, ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor)); } TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayloadMappingWhenGettingSizeForImplicitArgsProgrammingThenCorrectSizeIsReturned) { @@ -87,14 +83,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl KernelDescriptor kernelDescriptor{}; kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10; EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); - const auto &hwInfo = *defaultHwInfo; implicitArgs.simdWidth = 32; implicitArgs.localSizeX = 2; implicitArgs.localSizeY = 3; implicitArgs.localSizeZ = 4; - EXPECT_EQ(alignUp(implicitArgs.structSize, MemoryConstants::cacheLineSize), ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hwInfo)); + EXPECT_EQ(alignUp(implicitArgs.structSize, MemoryConstants::cacheLineSize), ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor)); } TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) { @@ -106,14 +101,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 2; EXPECT_TRUE(isUndefinedOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); - const auto &hwInfo = *defaultHwInfo; implicitArgs.simdWidth = 1; implicitArgs.localSizeX = 2; implicitArgs.localSizeY = 3; implicitArgs.localSizeZ = 4; - auto totalSizeForPatching = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hwInfo); + auto totalSizeForPatching = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor); auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ; auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t); @@ -124,7 +118,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, hwInfo, {}); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); @@ -149,14 +143,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl KernelDescriptor kernelDescriptor{}; kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10; EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); - const auto &hwInfo = *defaultHwInfo; implicitArgs.simdWidth = 32; implicitArgs.localSizeX = 2; implicitArgs.localSizeY = 3; implicitArgs.localSizeZ = 4; - auto totalSizeForPatching = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, hwInfo); + auto totalSizeForPatching = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor); EXPECT_EQ(0x80u, totalSizeForPatching); @@ -166,7 +159,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, hwInfo, {}); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); @@ -179,4 +172,4 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl for (; offset < totalSizeForPatching; offset++) { EXPECT_EQ(pattern, memoryToPatch.get()[offset]); } -} \ No newline at end of file +}