diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 64928bd3bb..c26bddd6ce 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -424,14 +424,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); launchParams.outWalker = dispatchKernelArgs.outWalkerPtr; - if (this->heaplessModeEnabled && this->scratchAddressPatchingEnabled && kernelNeedsScratchSpace) { + auto &scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress; + if (this->heaplessModeEnabled && this->scratchAddressPatchingEnabled && kernelNeedsScratchSpace && NEO::isDefined(scratchPointerAddress.pointerSize) && NEO::isValidOffset(scratchPointerAddress.offset)) { CommandToPatch scratchInlineData; scratchInlineData.pDestination = dispatchKernelArgs.outWalkerPtr; scratchInlineData.pCommand = nullptr; scratchInlineData.type = CommandToPatch::CommandType::ComputeWalkerInlineDataScratch; - scratchInlineData.offset = NEO::EncodeDispatchKernel::getInlineDataOffset(dispatchKernelArgs) + - kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress.offset; - scratchInlineData.patchSize = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress.pointerSize; + scratchInlineData.offset = NEO::EncodeDispatchKernel::getInlineDataOffset(dispatchKernelArgs) + scratchPointerAddress.offset; + scratchInlineData.patchSize = scratchPointerAddress.pointerSize; auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState); if (ssh != nullptr) { scratchInlineData.baseAddress = ssh->getGpuBase(); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 95e038ca05..f268a4c52b 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -405,6 +405,9 @@ struct CommandListScratchPatchFixtureInit : public ModuleMutableCommandListFixtu template void testExternalScratchPatching(); + template + void testScratchUndefinedNoPatching(); + int32_t fixtureGlobalStatelessMode = 0; uint32_t scratchInlineOffset = 8; uint32_t scratchInlinePointerSize = sizeof(uint64_t); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index c5ba2bd6aa..db8fc8eaf3 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -1910,5 +1910,43 @@ void CommandListScratchPatchFixtureInit::testExternalScratchPatching() { EXPECT_EQ(fullScratchAddress, scratchInlineValue); } +template +void CommandListScratchPatchFixtureInit::testScratchUndefinedNoPatching() { + const ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = ZE_RESULT_SUCCESS; + + struct TestParam { + uint8_t pointerSize; + InlineDataOffset offset; + }; + + std::vector testParams = { + {undefined, 0u}, + {8u, undefined}}; + + for (const auto &testParam : testParams) { + mockKernelImmData->kernelDescriptor->payloadMappings.implicitArgs.scratchPointerAddress.pointerSize = testParam.pointerSize; + mockKernelImmData->kernelDescriptor->payloadMappings.implicitArgs.scratchPointerAddress.offset = testParam.offset; + + result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + const auto &cmdsToPatch = commandList->getCommandsToPatch(); + bool foundScratchPatchCmd = false; + + for (const auto &cmdToPatch : cmdsToPatch) { + if (cmdToPatch.type == CommandToPatch::CommandType::ComputeWalkerInlineDataScratch) { + foundScratchPatchCmd = true; + break; + } + } + EXPECT_FALSE(foundScratchPatchCmd); + + result = commandList->reset(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index e9b94d04d0..085dde7589 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -1493,6 +1493,11 @@ HWTEST2_F(CommandListScratchPatchPrivateHeapsTest, testExternalScratchPatching(); } +HWTEST2_F(CommandListScratchPatchPrivateHeapsTest, + givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingKernelWithUndefinedScratchAddressThenScratchIsNotStoredToPatch, IsHeapfulSupportedAndAtLeastXeHpcCore) { + testScratchUndefinedNoPatching(); +} + HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsTest, givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) { testScratchInline(false); @@ -1518,6 +1523,11 @@ HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsTest, testExternalScratchPatching(); } +HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsTest, + givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingKernelWithUndefinedScratchAddressThenScratchIsNotStoredToPatch, IsAtLeastXeHpcCore) { + testScratchUndefinedNoPatching(); +} + HWTEST2_F(CommandListScratchPatchPrivateHeapsStateInitTest, givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsHeapfulSupportedAndAtLeastXeHpcCore) { testScratchInline(false); @@ -1543,6 +1553,11 @@ HWTEST2_F(CommandListScratchPatchPrivateHeapsStateInitTest, testExternalScratchPatching(); } +HWTEST2_F(CommandListScratchPatchPrivateHeapsStateInitTest, + givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingKernelWithUndefinedScratchAddressThenScratchIsNotStoredToPatch, IsHeapfulSupportedAndAtLeastXeHpcCore) { + testScratchUndefinedNoPatching(); +} + HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsStateInitTest, givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingAndExecutingKernelWithScratchThenExpectCorrectAddressPatched, IsAtLeastXeHpcCore) { testScratchInline(false); @@ -1568,6 +1583,11 @@ HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsStateInitTest, testExternalScratchPatching(); } +HWTEST2_F(CommandListScratchPatchGlobalStatelessHeapsStateInitTest, + givenHeaplessWithScratchPatchEnabledOnRegularCmdListWhenAppendingKernelWithUndefinedScratchAddressThenScratchIsNotStoredToPatch, IsAtLeastXeHpcCore) { + testScratchUndefinedNoPatching(); +} + HWTEST2_F(ImmediateCommandListTest, givenImmediateCmdListWhenAppendingRegularThenImmediateStreamIsSelected, MatchAny) { commandList->close(); auto cmdListHandle = commandList->toHandle(); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index d35e5e3de7..a4e66e730d 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -345,8 +345,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } } - uint8_t *inlineData = reinterpret_cast(walkerCmd.getInlineDataPointer()); - EncodeDispatchKernel::programInlineDataHeapless(inlineData, args, container, offsetThreadData, scratchAddressForImmediatePatching); + uint8_t *inlineDataPtr = reinterpret_cast(walkerCmd.getInlineDataPointer()); + EncodeDispatchKernel::programInlineDataHeapless(inlineDataPtr, args, container, offsetThreadData, scratchAddressForImmediatePatching); if constexpr (heaplessModeEnabled == false) { if (!args.makeCommandView) { diff --git a/shared/source/kernel/kernel_arg_descriptor.h b/shared/source/kernel/kernel_arg_descriptor.h index c81481e841..84ec05f63f 100644 --- a/shared/source/kernel/kernel_arg_descriptor.h +++ b/shared/source/kernel/kernel_arg_descriptor.h @@ -24,14 +24,23 @@ template static constexpr T undefined = std::numeric_limits::max(); template -bool isUndefinedOffset(T offset) { - static_assert(!std::is_pointer_v); +constexpr bool isUndefined(T value) { + return value == undefined; +} +template +constexpr bool isDefined(T value) { + return value != undefined; +} + +template + requires(!std::is_pointer_v) +constexpr bool isUndefinedOffset(T offset) { return undefined == offset; } template -bool isValidOffset(T offset) { +constexpr bool isValidOffset(T offset) { return false == isUndefinedOffset(offset); } @@ -52,8 +61,8 @@ struct ArgDescPointer final { }; struct ArgDescInlineDataPointer { - InlineDataOffset offset = 0u; - uint8_t pointerSize = 0u; + InlineDataOffset offset = undefined; + uint8_t pointerSize = undefined; }; enum class NEOImageType : uint8_t {