diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index d3967d02bc..d5c90f705c 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -542,12 +542,14 @@ void CommandQueueHw::setupCmdListsAndContextParams( ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); - if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { - if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { - heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); - } - for (auto element : commandContainer.sshAllocations) { - heapContainer.push_back(element); + if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::PrivateHeaps) { + if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { + if (commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { + heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); + } + for (auto &element : commandContainer.sshAllocations) { + heapContainer.push_back(element); + } } } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index 78098da1eb..68ada17028 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -150,6 +150,11 @@ void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &sshHe bool &gsbaState, bool &frontEndState, uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) { if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) { + if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { + auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation(); + scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), + csr->getOsContext(), gsbaState, frontEndState); + } if (sshHeaps.size() > 0) { uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u; scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 6aff3f773a..72e487f4e2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" @@ -2139,5 +2140,120 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, EXPECT_EQ((statlessMocs << 1), sbaCmd->getStatelessDataPortAccessMemoryObjectControlState()); } +HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, + givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingRegularCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched, + IsAtLeastXeHpCore) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using CFE_STATE = typename FamilyType::CFE_STATE; + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100; + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &cmdQueueStream = commandQueue->commandStream; + + size_t queueBefore = cmdQueueStream.getUsed(); + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t queueAfter = cmdQueueStream.getUsed(); + + auto globalSurfaceHeap = commandQueue->getCsr()->getGlobalStatelessHeap(); + + auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase(); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdQueueStream.getCpuBase(), queueBefore), + queueAfter - queueBefore)); + auto sbaCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(expectedSbaCmds, sbaCmds.size()); + + auto sbaCmd = reinterpret_cast(*sbaCmds[0]); + + EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable()); + EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress()); + + auto frontEndCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, frontEndCmds.size()); + + constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE); + + auto frontEndCmd = reinterpret_cast(*frontEndCmds[0]); + EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer()); + + auto scratchSpaceController = commandQueue->csr->getScratchSpaceController(); + EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress()); + + auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation(); + void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset); + auto scratchSurfaceState = reinterpret_cast(scratchSurfaceStateBuffer); + + auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation(); + EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); +} + +HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, + givenGlobalStatelessKernelUsingScratchSpaceWhenExecutingImmediateCommandListThenBaseAddressAndFrontEndStateCommandsProperlyDispatched, + IsAtLeastXeHpCore) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using CFE_STATE = typename FamilyType::CFE_STATE; + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100; + + auto &csrImmediate = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = csrImmediate.commandStream; + auto globalSurfaceHeap = csrImmediate.getGlobalStatelessHeap(); + + size_t csrUsedBefore = csrStream.getUsed(); + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t csrUsedAfter = csrStream.getUsed(); + + auto ssBaseAddress = globalSurfaceHeap->getHeapGpuBase(); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(csrStream.getCpuBase(), csrUsedBefore), + csrUsedAfter - csrUsedBefore)); + auto sbaCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(expectedSbaCmds, sbaCmds.size()); + + auto sbaCmd = reinterpret_cast(*sbaCmds[0]); + + EXPECT_TRUE(sbaCmd->getSurfaceStateBaseAddressModifyEnable()); + EXPECT_EQ(ssBaseAddress, sbaCmd->getSurfaceStateBaseAddress()); + + auto frontEndCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, frontEndCmds.size()); + + constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE); + + auto frontEndCmd = reinterpret_cast(*frontEndCmds[0]); + EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer()); + + auto scratchSpaceController = commandQueue->csr->getScratchSpaceController(); + EXPECT_EQ(expectedScratchOffset, scratchSpaceController->getScratchPatchAddress()); + + auto surfaceStateHeapAlloc = globalSurfaceHeap->getGraphicsAllocation(); + void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset); + auto scratchSurfaceState = reinterpret_cast(scratchSurfaceStateBuffer); + + auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation(); + EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); +} + } // namespace ult } // namespace L0