diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 0b0242119d..b275178113 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -207,6 +207,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder launchParams.additionalSizeParam, // additionalSizeParam 0, // partitionCount + launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace launchParams.isIndirect, // isIndirect launchParams.isPredicate, // isPredicate false, // isTimestampEvent diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 0b5964aad0..9eef65d624 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -334,6 +334,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K launchParams.requiredDispatchWalkOrder, // requiredDispatchWalkOrder launchParams.additionalSizeParam, // additionalSizeParam this->partitionCount, // partitionCount + launchParams.reserveExtraPayloadSpace, // reserveExtraPayloadSpace launchParams.isIndirect, // isIndirect launchParams.isPredicate, // isPredicate isTimestampEvent, // isTimestampEvent diff --git a/level_zero/core/source/cmdlist/cmdlist_launch_params.h b/level_zero/core/source/cmdlist/cmdlist_launch_params.h index 91295efeda..0c6611b75b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_launch_params.h +++ b/level_zero/core/source/cmdlist/cmdlist_launch_params.h @@ -54,6 +54,7 @@ struct CmdListKernelLaunchParams { uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; uint32_t numKernelsInSplitLaunch = 0; uint32_t numKernelsExecutedInSplitLaunch = 0; + uint32_t reserveExtraPayloadSpace = 0; bool isIndirect = false; bool isPredicate = false; bool isCooperative = false; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 735577c2fe..53500c6653 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -202,6 +202,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam 0, // partitionCount + 0, // reserveExtraPayloadSpace false, // isIndirect false, // isPredicate false, // isTimestampEvent diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index b98eb1641d..fc34531a5f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -699,6 +699,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam 0, // partitionCount + 0, // reserveExtraPayloadSpace false, // isIndirect false, // isPredicate false, // isTimestampEvent diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 2220eca2ad..6995a72377 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -9,8 +9,10 @@ #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_lib.h" +#include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/definitions/command_encoder_args.h" +#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/os_interface/product_helper.h" @@ -2871,5 +2873,35 @@ HWTEST2_F(CommandListAppendLaunchKernel, EXPECT_EQ(0, memcmp(walkerGfxMemory, launchParams.cmdWalkerBuffer, sizeof(DefaultWalkerType))); } +HWTEST2_F(CommandListAppendLaunchKernel, + givenCmdListParamHasExtraSpaceReserveWhenAppendingKernelThenExtraSpaceIsConsumed, + IsAtLeastXeHpCore) { + Mock<::L0::KernelImp> kernel; + auto mockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = mockModule.get(); + kernel.descriptor.kernelAttributes.flags.passInlineData = false; + kernel.perThreadDataSizeForWholeThreadGroup = 0; + kernel.crossThreadDataSize = 64; + kernel.crossThreadData = std::make_unique(kernel.crossThreadDataSize); + + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto &commandContainer = commandList->getCmdContainer(); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + launchParams.reserveExtraPayloadSpace = 1024; + result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeapType::indirectObject); + + size_t totalSize = 1024 + 64; + size_t expectedSize = alignUp(totalSize, device->getGfxCoreHelper().getIOHAlignment()); + EXPECT_EQ(expectedSize, ioh->getUsed()); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index db64e46b68..42462c2a72 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -61,6 +61,7 @@ struct EncodeDispatchKernelArgs { NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; uint32_t partitionCount = 0u; + uint32_t reserveExtraPayloadSpace = 0; bool isIndirect = false; bool isPredicate = false; bool isTimestampEvent = false; diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 7bb55e15dd..40ab0e71f7 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -241,7 +241,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment); - uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching; + uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace; { auto heap = container.getIndirectHeap(HeapType::indirectObject); UNRECOVERABLE_IF(!heap); @@ -253,7 +253,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis ptr = container.getHeapSpaceAllowGrow(HeapType::indirectObject, iohRequiredSize); } UNRECOVERABLE_IF(!ptr); - offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast(heap->getUsed() - sizeThreadData); + offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast(heap->getUsed() - sizeThreadData - args.reserveExtraPayloadSpace); auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment(); if (pImplicitArgs) { offsetThreadData -= ImplicitArgs::getSize(); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 7c32e4032d..7c79e7f932 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1594,3 +1594,24 @@ HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenCpuWalkerPointer EXPECT_EQ(0, memcmp(cmdWalkerGfxMemory, cpuWalkerPointer, sizeof(DefaultWalkerType))); } + +HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenRequestingExtraPayloadSpaceThenConsumeExtraIndirectHeapSpace, IsAtLeastXeHpCore) { + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + uint32_t dims[] = {1, 1, 1}; + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + + dispatchInterface->kernelDescriptor.kernelAttributes.flags.passInlineData = false; + dispatchInterface->getCrossThreadDataSizeResult = 64; + + bool requiresUncachedMocs = false; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + dispatchArgs.reserveExtraPayloadSpace = 1024; + + EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); + + auto heap = cmdContainer->getIndirectHeap(HeapType::indirectObject); + + size_t expectedConsumedSize = 64 + 1024; + expectedConsumedSize = alignUp(expectedConsumedSize, pDevice->getGfxCoreHelper().getIOHAlignment()); + EXPECT_EQ(expectedConsumedSize, heap->getUsed()); +} diff --git a/shared/test/unit_test/fixtures/command_container_fixture.cpp b/shared/test/unit_test/fixtures/command_container_fixture.cpp index 24e227a21e..577b310a20 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.cpp +++ b/shared/test/unit_test/fixtures/command_container_fixture.cpp @@ -56,6 +56,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder NEO::additionalKernelLaunchSizeParamNotSet, // additionalSizeParam 1, // partitionCount + 0, // reserveExtraPayloadSpace false, // isIndirect false, // isPredicate false, // isTimestampEvent