diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 8e8d13b91e..3d0379126e 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1028,19 +1028,17 @@ void KernelImp::patchImplicitArgs(void *&pOut) const { if (!pImplicitArgs) { return; } - const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor(); + const auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize; + auto dimensionOrder = NEO::ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, kernelRequiresGenerationOfLocalIdsByRuntime, requiredWorkgroupOrder); + NEO::generateLocalIDs( pOut, - static_cast(kernelDescriptor.kernelAttributes.simdSize), + static_cast(kernelAttributes.simdSize), std::array{{static_cast(groupSize[0]), static_cast(groupSize[1]), static_cast(groupSize[2])}}, - std::array{{ - kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], - kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], - kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2], - }}, + dimensionOrder, false, grfSize); auto sizeForLocalIdsProgramming = getSizeForImplicitArgsPatching() - sizeof(NEO::ImplicitArgs); pOut = ptrOffset(pOut, sizeForLocalIdsProgramming); diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 7c73fbee39..e2bfd11143 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -122,7 +122,9 @@ struct ModuleImmutableDataFixture : public DeviceFixture { using KernelImp::crossThreadDataSize; using KernelImp::kernelArgHandlers; using KernelImp::kernelHasIndirectAccess; + using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using KernelImp::privateMemoryGraphicsAllocation; + using KernelImp::requiredWorkgroupOrder; MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 134cc108eb..cf69f01c36 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -998,6 +998,85 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsWhenAppendLa alignedFree(expectedLocalIds); } +HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithImplicitArgsAndHwGeneratedLocalIdsWhenAppendLaunchKernelThenImplicitArgsLocalIdsRespectWalkOrder) { + std::unique_ptr mockKernelImmData = std::make_unique(0u); + auto kernelDescriptor = mockKernelImmData->kernelDescriptor; + kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true; + auto simd = kernelDescriptor->kernelAttributes.simdSize; + kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = 2; + kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = 1; + kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[2] = 0; + createModuleFromBinary(0u, false, mockKernelImmData.get()); + + auto kernel = std::make_unique(module.get()); + + ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; + kernel->initialize(&kernelDesc); + kernel->kernelRequiresGenerationOfLocalIdsByRuntime = false; + kernel->requiredWorkgroupOrder = 2; // walk order 1 0 2 + + EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); + ASSERT_NE(nullptr, kernel->getImplicitArgs()); + + kernel->setGroupSize(4, 5, 6); + kernel->setGroupCount(3, 2, 1); + kernel->setGlobalOffsetExp(1, 2, 3); + kernel->patchGlobalOffset(); + + ze_result_t result{}; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); + + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); + memset(indirectHeap->getSpace(0), 0, kernel->getSizeForImplicitArgsPatching()); + + ze_group_count_t groupCount{3, 2, 1}; + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); + auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup(); + EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + kernel->getSizeForImplicitArgsPatching()); + + ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)}; + expectedImplicitArgs.numWorkDim = 3; + expectedImplicitArgs.simdWidth = simd; + expectedImplicitArgs.localSizeX = 4; + expectedImplicitArgs.localSizeY = 5; + expectedImplicitArgs.localSizeZ = 6; + expectedImplicitArgs.globalSizeX = 12; + expectedImplicitArgs.globalSizeY = 10; + expectedImplicitArgs.globalSizeZ = 6; + expectedImplicitArgs.globalOffsetX = 1; + expectedImplicitArgs.globalOffsetY = 2; + expectedImplicitArgs.globalOffsetZ = 3; + expectedImplicitArgs.groupCountX = 3; + expectedImplicitArgs.groupCountY = 2; + expectedImplicitArgs.groupCountZ = 1; + expectedImplicitArgs.localIdTablePtr = indirectHeap->getGraphicsAllocation()->getGpuAddress(); + expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress(); + + auto sizeForImplicitArgPatching = kernel->getSizeForImplicitArgsPatching(); + + EXPECT_LT(0u, sizeForImplicitArgPatching); + + auto localIdsProgrammingSize = sizeForImplicitArgPatching - sizeof(ImplicitArgs); + + auto expectedLocalIds = alignedMalloc(localIdsProgrammingSize, 64); + memset(expectedLocalIds, 0, localIdsProgrammingSize); + constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF); + NEO::generateLocalIDs(expectedLocalIds, simd, + std::array{{4, 5, 6}}, + std::array{{1, 0, 2}}, + false, grfSize); + + EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeap->getCpuBase(), localIdsProgrammingSize)); + auto pImplicitArgs = reinterpret_cast(ptrOffset(indirectHeap->getCpuBase(), localIdsProgrammingSize)); + EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs))); + + alignedFree(expectedLocalIds); +} HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreNotSentToIndirectHeap) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 8d599624fc..9688ef6271 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -219,15 +219,28 @@ size_t HardwareCommandsHelper::sendIndirectState( auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { constexpr uint32_t grfSize = sizeof(typename GfxFamily::GRF); + const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes; + uint32_t requiredWalkOrder = 0u; + auto generationOfLocalIdsByRuntime = EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( + 3, + localWorkSize, + std::array{ + {kernelAttributes.workgroupWalkOrder[0], + kernelAttributes.workgroupWalkOrder[1], + kernelAttributes.workgroupWalkOrder[2]}}, + kernelAttributes.flags.requiresWorkgroupWalkOrder, + requiredWalkOrder, + simd); + + auto dimensionOrder = ImplicitArgsHelper::getDimensionOrderForLocalIds(kernelAttributes.workgroupDimensionsOrder, generationOfLocalIdsByRuntime, requiredWalkOrder); + auto offsetLocalIds = sendPerThreadData( ioh, simd, grfSize, 3u, // all channels for implicit args std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, - {{kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], - kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], - kernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, + dimensionOrder, kernel.usesOnlyImages()); pImplicitArgs->localIdTablePtr = offsetLocalIds + ioh.getGraphicsAllocation()->getGpuAddress(); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 448c139ff7..71cfc8c74c 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1261,7 +1261,9 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeapAllocation); } -HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) { +HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableHwGenerationLocalIds.set(0); auto pKernelInfo = std::make_unique(); uint32_t simd = 32; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd; @@ -1330,6 +1332,74 @@ HWTEST_F(HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendi EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr); } +HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndHwLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) { + auto pKernelInfo = std::make_unique(); + uint32_t simd = 32; + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = simd; + pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; + pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2; + pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; + pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0; + + MockContext context(pClDevice); + CommandQueueHw cmdQ(&context, pClDevice, 0, false); + MockProgram program(&context, false, toClDeviceVector(*pClDevice)); + + MockKernel kernel(&program, *pKernelInfo, *pClDevice); + ASSERT_EQ(CL_SUCCESS, kernel.initialize()); + + const size_t localWorkSizeX = 2; + const size_t localWorkSizeY = 3; + const size_t localWorkSizeZ = 4; + const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ}; + std::array expectedDimOrder = {0, 2, 1}; + + auto &commandStream = cmdQ.getCS(1024); + auto pWalkerCmd = reinterpret_cast(commandStream.getSpace(0)); + + auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); + auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); + auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); + + dsh.align(EncodeStates::alignInterfaceDescriptorData); + auto interfaceDescriptor = reinterpret_cast(dsh.getSpace(0)); + uint32_t interfaceDescriptorIndex = 0u; + + HardwareCommandsHelper::sendIndirectState( + commandStream, + dsh, + ioh, + ssh, + kernel, + 0u, + simd, + localWorkSizes, + 0u, + interfaceDescriptorIndex, + pDevice->getPreemptionMode(), + pWalkerCmd, + interfaceDescriptor, + false, + *pDevice); + + constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF); + size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; + size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, 3u, localWorkSize); + ASSERT_LE(expectedIohSize, ioh.getUsed()); + + auto expectedLocalIds = alignedMalloc(expectedIohSize, 64); + generateLocalIDs(expectedLocalIds, simd, + std::array{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}}, + expectedDimOrder, + false, grfSize); + + EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize)); + alignedFree(expectedLocalIds); + + auto pImplicitArgs = reinterpret_cast(ptrOffset(ioh.getCpuBase(), alignUp(expectedIohSize, MemoryConstants::cacheLineSize))); + EXPECT_EQ(ioh.getGraphicsAllocation()->getGpuAddress(), pImplicitArgs->localIdTablePtr); +} + using HardwareCommandsTestXeHpAndLater = HardwareCommandsTest; HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) { diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index b98c3a45b3..e6d1c6d4d4 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -64,7 +64,7 @@ struct EncodeDispatchKernel { static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset); static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, - size_t *lws, + const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 6c66b1d3f6..79c2e6cdd1 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -263,7 +263,7 @@ void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &contai template inline bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, - size_t *lws, + const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 651c7ce6cb..ecec97504f 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -19,6 +19,7 @@ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/hw_walk_order.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" @@ -298,7 +299,7 @@ inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const Har template bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, - size_t *lws, + const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, @@ -324,18 +325,6 @@ bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t return true; } - //make sure table below matches Hardware Spec - constexpr uint32_t walkOrderPossibilties = 6u; - constexpr uint8_t X = 0; - constexpr uint8_t Y = 1; - constexpr uint8_t Z = 2; - constexpr uint8_t possibleWalkOrders[walkOrderPossibilties][3] = {{X, Y, Z}, // 0 1 2 - {X, Z, Y}, // 0 2 1 - {Y, X, Z}, // 1 0 2 - {Z, X, Y}, // 1 2 0 - {Y, Z, X}, // 2 0 1 - {Z, Y, X}}; // 2 1 0 - //check if we need to follow kernel requirements if (requireInputWalkOrder) { for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { @@ -345,24 +334,24 @@ bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t } auto index = 0u; - while (index < walkOrderPossibilties) { - if (walkOrder[0] == possibleWalkOrders[index][0] && - walkOrder[1] == possibleWalkOrders[index][1]) { + while (index < HwWalkOrderHelper::walkOrderPossibilties) { + if (walkOrder[0] == HwWalkOrderHelper::compatibleDimensionOrders[index][0] && + walkOrder[1] == HwWalkOrderHelper::compatibleDimensionOrders[index][1]) { break; }; index++; } - DEBUG_BREAK_IF(index >= walkOrderPossibilties); + DEBUG_BREAK_IF(index >= HwWalkOrderHelper::walkOrderPossibilties); requiredWalkOrder = index; return false; } //kernel doesn't specify any walk order requirements, check if we have any compatible - for (uint32_t walkOrder = 0; walkOrder < walkOrderPossibilties; walkOrder++) { + for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) { bool allDimensionsCompatible = true; for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { - if (!Math::isPow2(lws[possibleWalkOrders[walkOrder][dimension]])) { + if (!Math::isPow2(lws[HwWalkOrderHelper::compatibleDimensionOrders[walkOrder][dimension]])) { allDimensionsCompatible = false; break; } diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index 942f78c030..dd0a94da8e 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -74,6 +74,7 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/hw_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}hw_info_extended.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hw_walk_order.h ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h diff --git a/shared/source/helpers/hw_walk_order.h b/shared/source/helpers/hw_walk_order.h new file mode 100644 index 0000000000..4f5977b93e --- /dev/null +++ b/shared/source/helpers/hw_walk_order.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include +#include + +namespace NEO { +namespace HwWalkOrderHelper { +//make sure table below matches Hardware Spec +constexpr uint32_t walkOrderPossibilties = 6u; +constexpr uint8_t X = 0; +constexpr uint8_t Y = 1; +constexpr uint8_t Z = 2; +constexpr std::array compatibleDimensionOrders[walkOrderPossibilties] = {{X, Y, Z}, // 0 1 2 + {X, Z, Y}, // 0 2 1 + {Y, X, Z}, // 1 0 2 + {Z, X, Y}, // 1 2 0 + {Y, Z, X}, // 2 0 1 + {Z, Y, X}}; // 2 1 0 +} // namespace HwWalkOrderHelper +} // namespace NEO diff --git a/shared/source/kernel/CMakeLists.txt b/shared/source/kernel/CMakeLists.txt index bf90676672..a43ca526ec 100644 --- a/shared/source/kernel/CMakeLists.txt +++ b/shared/source/kernel/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -10,6 +10,7 @@ set(NEO_CORE_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_kernel_encoder_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/grf_config.h ${CMAKE_CURRENT_SOURCE_DIR}/implicit_args.h + ${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h diff --git a/shared/source/kernel/implicit_args.h b/shared/source/kernel/implicit_args.h index 75745ced42..b5775f5e80 100644 --- a/shared/source/kernel/implicit_args.h +++ b/shared/source/kernel/implicit_args.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -36,4 +37,8 @@ static_assert((sizeof(ImplicitArgs) & 31) == 0, "Implicit args size need to be a static_assert(std::is_pod::value); constexpr const char *implicitArgsRelocationSymbolName = "INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0"; + +namespace ImplicitArgsHelper { +std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds); +} } // namespace NEO diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp new file mode 100644 index 0000000000..18ca65d9a6 --- /dev/null +++ b/shared/source/kernel/implicit_args_helper.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_walk_order.h" +#include "shared/source/kernel/implicit_args.h" +#include "shared/source/kernel/kernel_descriptor.h" + +namespace NEO { + +std::array ImplicitArgsHelper::getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, bool generationOfLocalIdsByRuntime, uint32_t walkOrderForHwGenerationOfLocalIds) { + if (generationOfLocalIdsByRuntime) { + UNRECOVERABLE_IF(!workgroupDimensionsOrder); + return {{ + workgroupDimensionsOrder[0], + workgroupDimensionsOrder[1], + workgroupDimensionsOrder[2], + }}; + } + + UNRECOVERABLE_IF(walkOrderForHwGenerationOfLocalIds >= HwWalkOrderHelper::walkOrderPossibilties); + return HwWalkOrderHelper::compatibleDimensionOrders[walkOrderForHwGenerationOfLocalIds]; +} +} // namespace NEO diff --git a/shared/test/unit_test/kernel/CMakeLists.txt b/shared/test/unit_test/kernel/CMakeLists.txt index 870d911f7e..abf4e199af 100644 --- a/shared/test/unit_test/kernel/CMakeLists.txt +++ b/shared/test/unit_test/kernel/CMakeLists.txt @@ -1,11 +1,12 @@ # -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/implicit_args_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens_tests.cpp diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp new file mode 100644 index 0000000000..cf28af59d2 --- /dev/null +++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_walk_order.h" +#include "shared/source/kernel/implicit_args.h" +#include "shared/test/common/test_macros/test.h" + +using namespace NEO; + +TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByRuntimeThenDimensionOrderIsTakedFromInput) { + for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) { + uint8_t inputDimensionOrder[3] = {2, 0, 1}; + auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(inputDimensionOrder, true, i); + EXPECT_EQ(inputDimensionOrder[0], dimOrderForImplicitArgs[0]); + EXPECT_EQ(inputDimensionOrder[1], dimOrderForImplicitArgs[1]); + EXPECT_EQ(inputDimensionOrder[2], dimOrderForImplicitArgs[2]); + } +} + +TEST(ImplicitArgsHelperTest, givenIncorrectcInputWhenGettingDimensionOrderThenAbortIsCalled) { + EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, true, 0), std::runtime_error); + EXPECT_THROW(ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, HwWalkOrderHelper::walkOrderPossibilties), std::runtime_error); +} + +TEST(ImplicitArgsHelperTest, whenLocalIdsAreGeneratedByHwThenProperDimensionOrderIsReturned) { + for (auto i = 0u; i < HwWalkOrderHelper::walkOrderPossibilties; i++) { + auto dimOrderForImplicitArgs = ImplicitArgsHelper::getDimensionOrderForLocalIds(nullptr, false, i); + EXPECT_EQ(HwWalkOrderHelper::compatibleDimensionOrders[i], dimOrderForImplicitArgs); + } +}