From 2a9bcdeb83d17d9549504ecb54d0b583dbffab6b Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Mon, 5 Aug 2024 16:40:51 +0000 Subject: [PATCH] refactor: pass outImplicitArgs to patchImplicitArgs function Signed-off-by: Kamil Kopryk --- .../source/cmdlist/cmdlist_hw_skl_to_tgllp.inl | 1 + .../source/cmdlist/cmdlist_hw_xehp_and_later.inl | 1 + .../test_cmdlist_append_launch_kernel_1.cpp | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 1 + .../hardware_commands_helper_bdw_and_later.inl | 2 +- .../hardware_commands_helper_xehp_and_later.inl | 2 +- shared/source/command_container/command_encoder.h | 2 +- .../source/command_container/command_encoder.inl | 5 ----- .../command_encoder_bdw_and_later.inl | 2 +- .../command_encoder_xehp_and_later.inl | 2 +- shared/source/kernel/implicit_args_helper.cpp | 7 ++++++- shared/source/kernel/implicit_args_helper.h | 2 +- .../command_container/command_encoder_tests.cpp | 8 -------- .../fixtures/command_container_fixture.cpp | 1 + .../kernel/implicit_args_helper_tests.cpp | 14 +++++++++++--- 15 files changed, 28 insertions(+), 23 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index a7210db0b0..fba2d7856a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -203,6 +203,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K nullptr, // outWalkerPtr nullptr, // cpuWalkerBuffer nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr &additionalCommands, // additionalCommands commandListPreemptionMode, // preemptionMode launchParams.requiredPartitionDim, // requiredPartitionDim diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index b49a5ccb0d..2feb0e3a03 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -348,6 +348,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K nullptr, // outWalkerPtr launchParams.cmdWalkerBuffer, // cpuWalkerBuffer launchParams.hostPayloadBuffer, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr &additionalCommands, // additionalCommands kernelPreemptionMode, // preemptionMode launchParams.requiredPartitionDim, // requiredPartitionDim diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index da1b702e26..271c3a82f1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -202,6 +202,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA nullptr, // outWalkerPtr nullptr, // cpuWalkerBuffer nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr nullptr, // additionalCommands PreemptionMode::MidBatch, // preemptionMode NEO::RequiredPartitionDim::none, // requiredPartitionDim diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 33c5b096a7..fcbc577efe 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -749,6 +749,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA nullptr, // outWalkerPtr nullptr, // cpuWalkerBuffer nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr nullptr, // additionalCommands PreemptionMode::MidBatch, // preemptionMode NEO::RequiredPartitionDim::none, // requiredPartitionDim diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl index 4aa24161de..06aec3aaee 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl @@ -92,7 +92,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr); auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index 6953bd565c..c89dd4a416 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -95,7 +95,7 @@ size_t HardwareCommandsHelper::sendCrossThreadData( auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); - ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment); + ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder), rootDeviceEnvironment, nullptr); } uint32_t sizeToCopy = sizeCrossThreadData; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index aec28d99f0..d5f52d680b 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -58,6 +58,7 @@ struct EncodeDispatchKernelArgs { void *outWalkerPtr = nullptr; void *cpuWalkerBuffer = nullptr; void *cpuPayloadBuffer = nullptr; + void *outImplicitArgsPtr = nullptr; std::list *additionalCommands = nullptr; PreemptionMode preemptionMode = PreemptionMode::Initial; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; @@ -210,7 +211,6 @@ struct EncodeDispatchKernel { static void patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); static size_t getInlineDataOffset(EncodeDispatchKernelArgs &args); - static void *getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor); static size_t getScratchPtrOffsetOfImplicitArgs(); template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 1c848c86fc..40c6ff6687 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -906,11 +906,6 @@ size_t EncodeDispatchKernel::getDefaultDshAlignment() { return Family::cacheLineSize; } -template -void *EncodeDispatchKernel::getImplicitArgsAddress(EncodeDispatchKernelArgs &args, const KernelDescriptor &kernelDescriptor) { - return nullptr; -} - template size_t EncodeDispatchKernel::getScratchPtrOffsetOfImplicitArgs() { return 0; diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index d615a9892c..854fecd4da 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -198,7 +198,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto implicitArgsCrossThreadPtr = ptrOffset(const_cast(reinterpret_cast(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr); } memcpy_s(ptr, sizeCrossThreadData, diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index f3c3a148fc..b54b2c70f5 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -267,7 +267,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; EncodeDispatchKernel::patchScratchAddressInImplicitArgs(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching); - ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment); + ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment, &args.outImplicitArgsPtr); } if (args.isIndirect) { diff --git a/shared/source/kernel/implicit_args_helper.cpp b/shared/source/kernel/implicit_args_helper.cpp index 3621f04166..ca85e81e85 100644 --- a/shared/source/kernel/implicit_args_helper.cpp +++ b/shared/source/kernel/implicit_args_helper.cpp @@ -66,7 +66,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const } } -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment) { +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgsAddress) { auto localIdsGeneratedByHw = hwGenerationOfLocalIdsParams.has_value() ? hwGenerationOfLocalIdsParams.value().first : false; auto totalSizeToProgram = getSizeForImplicitArgsPatching(&implicitArgs, kernelDescriptor, localIdsGeneratedByHw, rootDeviceEnvironment); auto retVal = ptrOffset(ptrToPatch, totalSizeToProgram); @@ -89,6 +89,11 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons auto sizeForLocalIdsProgramming = totalSizeToProgram - ImplicitArgs::getSize(); ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming); } + + if (outImplicitArgsAddress) { + *outImplicitArgsAddress = ptrToPatch; + } + memcpy_s(ptrToPatch, ImplicitArgs::getSize(), &implicitArgs, ImplicitArgs::getSize()); return retVal; diff --git a/shared/source/kernel/implicit_args_helper.h b/shared/source/kernel/implicit_args_helper.h index f31de5faa4..0103063e6a 100644 --- a/shared/source/kernel/implicit_args_helper.h +++ b/shared/source/kernel/implicit_args_helper.h @@ -25,6 +25,6 @@ namespace ImplicitArgsHelper { std::array getDimensionOrderForLocalIds(const uint8_t *workgroupDimensionsOrder, std::optional> hwGenerationOfLocalIdsParams); uint32_t getGrfSize(uint32_t simd); uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool localIdsGeneratedByRuntime, const RootDeviceEnvironment &rootDeviceEnvironment); -void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment); +void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, const KernelDescriptor &kernelDescriptor, std::optional> hwGenerationOfLocalIdsParams, const RootDeviceEnvironment &rootDeviceEnvironment, void **outImplicitArgs); } // namespace ImplicitArgsHelper } // namespace NEO diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 613e6a4bf8..6bb82a41a2 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -778,14 +778,6 @@ HWTEST_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPoli } } -HWTEST_F(CommandEncoderTests, whenGetImplicitArgsAddressIsCalledThenNullptrIsReturned) { - - KernelDescriptor kernelDescriptor{}; - EncodeDispatchKernelArgs args{}; - auto implicitArgsPtr = EncodeDispatchKernel::getImplicitArgsAddress(args, kernelDescriptor); - EXPECT_EQ(nullptr, implicitArgsPtr); -} - HWTEST_F(CommandEncoderTests, whenGetScratchPtrOffsetOfImplicitArgsIsCalledThenZeroIsReturned) { auto scratchOffset = EncodeDispatchKernel::getScratchPtrOffsetOfImplicitArgs(); diff --git a/shared/test/unit_test/fixtures/command_container_fixture.cpp b/shared/test/unit_test/fixtures/command_container_fixture.cpp index 6582e38100..7f4dae6fd1 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.cpp +++ b/shared/test/unit_test/fixtures/command_container_fixture.cpp @@ -51,6 +51,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel nullptr, // outWalkerPtr nullptr, // cpuWalkerBuffer nullptr, // cpuPayloadBuffer + nullptr, // outImplicitArgsPtr nullptr, // additionalCommands PreemptionMode::Disabled, // preemptionMode NEO::RequiredPartitionDim::none, // requiredPartitionDim diff --git a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp index 68b1b51ac8..f804b8d100 100644 --- a/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp +++ b/shared/test/unit_test/kernel/implicit_args_helper_tests.cpp @@ -102,6 +102,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) { ImplicitArgs implicitArgs{ImplicitArgs::getSize()}; + void *outImplicitArgs = nullptr; KernelDescriptor kernelDescriptor{}; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 0; kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; @@ -119,6 +120,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP auto totalWorkgroupSize = implicitArgs.localSizeX * implicitArgs.localSizeY * implicitArgs.localSizeZ; auto localIdsPatchingSize = totalWorkgroupSize * 3 * sizeof(uint16_t); + auto localIdsOffset = alignUp(localIdsPatchingSize, MemoryConstants::cacheLineSize); auto memoryToPatch = std::make_unique(totalSizeForPatching); @@ -126,10 +128,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); + void *expectedImplicitArgsPtr = ptrOffset(memoryToPatch.get(), localIdsOffset); + EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs); + uint32_t offset = 0; for (; offset < localIdsPatchingSize; offset++) { @@ -147,7 +152,7 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithoutImplicitArgsBufferOffsetInP TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayloadMappingWhenPatchingImplicitArgsThenOnlyProperRegionIsPatched) { ImplicitArgs implicitArgs{ImplicitArgs::getSize()}; - + void *outImplicitArgs = nullptr; KernelDescriptor kernelDescriptor{}; kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer = 0x10; EXPECT_TRUE(isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer)); @@ -168,10 +173,13 @@ TEST(ImplicitArgsHelperTest, givenImplicitArgsWithImplicitArgsBufferOffsetInPayl memset(memoryToPatch.get(), pattern, totalSizeForPatching); - auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment); + auto retVal = ImplicitArgsHelper::patchImplicitArgs(memoryToPatch.get(), implicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, &outImplicitArgs); EXPECT_EQ(retVal, ptrOffset(memoryToPatch.get(), totalSizeForPatching)); + void *expectedImplicitArgsPtr = memoryToPatch.get(); + EXPECT_EQ(expectedImplicitArgsPtr, outImplicitArgs); + uint32_t offset = 0; for (; offset < ImplicitArgs::getSize(); offset++) {