diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 189c43d234..8578572c8a 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -297,6 +297,10 @@ struct CommandList : _ze_command_list_handle_t { return this->isTbxMode && !this->isSyncModeQueue; } + NEO::HeapAddressModel getCmdListHeapAddressModel() const { + return this->cmdListHeapAddressModel; + } + void setCmdListContext(ze_context_handle_t contextHandle) { this->hContext = contextHandle; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 94ceb10377..9de382b885 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -205,14 +205,16 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO:: CommandQueueImp *commandQueue = nullptr; returnValue = ZE_RESULT_ERROR_UNINITIALIZED; + if (!allocator) { + return nullptr; + } - if (allocator) { - commandQueue = static_cast((*allocator)(device, csr, desc)); - returnValue = commandQueue->initialize(isCopyOnly, isInternal); - if (returnValue != ZE_RESULT_SUCCESS) { - commandQueue->destroy(); - commandQueue = nullptr; - } + commandQueue = static_cast((*allocator)(device, csr, desc)); + returnValue = commandQueue->initialize(isCopyOnly, isInternal); + if (returnValue != ZE_RESULT_SUCCESS) { + commandQueue->destroy(); + commandQueue = nullptr; + return nullptr; } auto &osContext = csr->getOsContext(); @@ -223,8 +225,11 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO:: } csr->initializeResources(); - csr->initDirectSubmission(); + if (commandQueue->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) { + csr->createGlobalStatelessHeap(); + } + return commandQueue; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index adeac10fba..27d9d90cf6 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -125,9 +125,10 @@ struct CommandQueueHw : public CommandQueueImp { ze_command_list_handle_t *phCommandLists, uint32_t numCommandLists); MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize); - inline void allocateGlobalFenceAndMakeItResident(); - inline void allocateWorkPartitionAndMakeItResident(); - inline void allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream); + inline void getGlobalFenceAndMakeItResident(); + inline void getWorkPartitionAndMakeItResident(); + inline void getGlobalStatelessHeapAndMakeItResident(); + inline void getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream); inline void makeSbaTrackingBufferResidentIfL0DebuggerEnabled(bool isDebugEnabled); inline void programCommandQueueDebugCmdsForSourceLevelOrL0DebuggerIfEnabled(bool isDebugEnabled, NEO::LinearStream &commandStream); inline void programStateBaseAddressWithGsbaIfDirty(CommandListExecutionContext &ctx, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 60252a5aad..29a2abb149 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -124,9 +124,10 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( return ret; } - this->allocateGlobalFenceAndMakeItResident(); - this->allocateWorkPartitionAndMakeItResident(); - this->allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child); + this->getGlobalFenceAndMakeItResident(); + this->getWorkPartitionAndMakeItResident(); + this->getGlobalStatelessHeapAndMakeItResident(); + this->getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child); this->csr->programHardwareContext(child); this->makeSbaTrackingBufferResidentIfL0DebuggerEnabled(ctx.isDebugEnabled); @@ -226,8 +227,8 @@ ze_result_t CommandQueueHw::executeCommandListsCopyOnly( return ret; } - this->allocateGlobalFenceAndMakeItResident(); - this->allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child); + this->getGlobalFenceAndMakeItResident(); + this->getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child); this->csr->programHardwareContext(child); this->encodeKernelArgsBufferAndMakeItResident(); @@ -688,7 +689,7 @@ ze_result_t CommandQueueHw::makeAlignedChildStreamAndSetGpuBase(N } template -void CommandQueueHw::allocateGlobalFenceAndMakeItResident() { +void CommandQueueHw::getGlobalFenceAndMakeItResident() { const auto globalFenceAllocation = this->csr->getGlobalFenceAllocation(); if (globalFenceAllocation) { this->csr->makeResident(*globalFenceAllocation); @@ -696,7 +697,7 @@ void CommandQueueHw::allocateGlobalFenceAndMakeItResident() { } template -void CommandQueueHw::allocateWorkPartitionAndMakeItResident() { +void CommandQueueHw::getWorkPartitionAndMakeItResident() { const auto workPartitionAllocation = this->csr->getWorkPartitionAllocation(); if (workPartitionAllocation) { this->csr->makeResident(*workPartitionAllocation); @@ -704,7 +705,15 @@ void CommandQueueHw::allocateWorkPartitionAndMakeItResident() { } template -void CommandQueueHw::allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &cmdStream) { +void CommandQueueHw::getGlobalStatelessHeapAndMakeItResident() { + const auto globalStatelessAllocation = this->csr->getGlobalStatelessHeapAllocation(); + if (globalStatelessAllocation) { + this->csr->makeResident(*globalStatelessAllocation); + } +} + +template +void CommandQueueHw::getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &cmdStream) { if (NEO::DebugManager.flags.EnableSWTags.get()) { NEO::Device *neoDevice = this->device->getNEODevice(); NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get(); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index afc9fa4b27..cb4425dce9 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -184,6 +184,27 @@ uint32_t CommandListStateBaseAddressFixture::getMocs(bool l3On) { return device->getMOCS(l3On, false) >> 1; } +void CommandListGlobalHeapsFixtureInit::setUp() { + CommandListGlobalHeapsFixtureInit::setUpParams(static_cast(NEO::HeapAddressModel::GlobalStateless)); +} + +void CommandListGlobalHeapsFixtureInit::setUpParams(int32_t globalHeapMode) { + DebugManager.flags.SelectCmdListHeapAddressModel.set(globalHeapMode); + CommandListStateBaseAddressFixture::setUp(); + + DebugManager.flags.SelectCmdListHeapAddressModel.set(static_cast(NEO::HeapAddressModel::PrivateHeaps)); + + ze_result_t returnValue; + commandListPrivateHeap.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue))); + + DebugManager.flags.SelectCmdListHeapAddressModel.set(globalHeapMode); +} + +void CommandListGlobalHeapsFixtureInit::tearDown() { + commandListPrivateHeap.reset(nullptr); + CommandListStateBaseAddressFixture::tearDown(); +} + void ImmediateCmdListSharedHeapsFixture::setUp() { DebugManager.flags.EnableFlushTaskSubmission.set(1); DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 0132a6c927..c3f496fde6 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -129,6 +129,20 @@ struct CommandListStateBaseAddressFixture : public ModuleMutableCommandListFixtu bool dshRequired = false; }; +struct CommandListGlobalHeapsFixtureInit : public CommandListStateBaseAddressFixture { + void setUp(); + void setUpParams(int32_t globalHeapMode); + void tearDown(); + std::unique_ptr commandListPrivateHeap; +}; + +template +struct CommandListGlobalHeapsFixture : public CommandListGlobalHeapsFixtureInit { + void setUp() { + CommandListGlobalHeapsFixtureInit::setUpParams(globalHeapMode); + } +}; + struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture { void setUp(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 9099c0b687..983a02c649 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -2424,7 +2424,7 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef EXPECT_EQ(expectedDoubleSbaWa, commandList->doubleSbaWa); auto expectedHeapAddressModel = l0GfxCoreHelper.getPlatformHeapAddressModel(); - EXPECT_EQ(expectedHeapAddressModel, commandList->cmdListHeapAddressModel); + EXPECT_EQ(expectedHeapAddressModel, commandList->getCmdListHeapAddressModel()); EXPECT_EQ(expectedHeapAddressModel, commandList->getCmdContainer().getHeapAddressModel()); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 2807789156..34b794746a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -974,5 +974,24 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH EXPECT_GE(expectedSshAlignedSize, sshUsed); } +using CommandListGlobalStatelessTest = Test(NEO::HeapAddressModel::GlobalStateless)>>; +HWTEST2_F(CommandListGlobalStatelessTest, givenGlobalStatelessWhenExecutingCommandListThenMakeAllocationResident, IsAtLeastXeHpCore) { + EXPECT_EQ(NEO::HeapAddressModel::GlobalStateless, commandList->cmdListHeapAddressModel); + EXPECT_EQ(NEO::HeapAddressModel::GlobalStateless, commandListImmediate->cmdListHeapAddressModel); + EXPECT_EQ(NEO::HeapAddressModel::GlobalStateless, commandQueue->cmdListHeapAddressModel); + + ASSERT_EQ(commandListImmediate->csr, commandQueue->getCsr()); + auto globalStatelessAlloc = commandListImmediate->csr->getGlobalStatelessHeapAllocation(); + EXPECT_NE(nullptr, globalStatelessAlloc); + + auto ultCsr = static_cast *>(commandListImmediate->csr); + ultCsr->storeMakeResidentAllocations = true; + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + auto result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_TRUE(ultCsr->isMadeResident(globalStatelessAlloc)); +} } // namespace ult } // namespace L0 diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index d1062b786c..bd3bd82575 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -384,6 +384,11 @@ void CommandStreamReceiver::cleanupResources() { getMemoryManager()->freeGraphicsMemory(kernelArgsBufferAllocation); kernelArgsBufferAllocation = nullptr; } + + if (globalStatelessHeapAllocation) { + getMemoryManager()->freeGraphicsMemory(globalStatelessHeapAllocation); + globalStatelessHeapAllocation = nullptr; + } } WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) { @@ -1033,5 +1038,22 @@ uint64_t CommandStreamReceiver::getCompletionAddress() const { return completionFenceAddress; } +void CommandStreamReceiver::createGlobalStatelessHeap() { + if (this->globalStatelessHeapAllocation == nullptr) { + auto lock = obtainUniqueOwnership(); + if (this->globalStatelessHeapAllocation == nullptr) { + constexpr size_t heapSize = 16 * MemoryConstants::kiloByte; + constexpr AllocationType allocationType = AllocationType::LINEAR_STREAM; + + AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, + isMultiOsContextCapable(), false, osContext->getDeviceBitfield()}; + + this->globalStatelessHeapAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + + this->globalStatelessHeap = std::make_unique(this->globalStatelessHeapAllocation); + } + } +} + std::function CommandStreamReceiver::debugConfirmationFunction = []() { std::cin.get(); }; } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 67c763fbc0..72c2e0ef2d 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -171,6 +171,7 @@ class CommandStreamReceiver { GraphicsAllocation *getPreemptionAllocation() const { return preemptionAllocation; } GraphicsAllocation *getGlobalFenceAllocation() const { return globalFenceAllocation; } GraphicsAllocation *getWorkPartitionAllocation() const { return workPartitionAllocation; } + GraphicsAllocation *getGlobalStatelessHeapAllocation() const { return globalStatelessHeapAllocation; } GraphicsAllocation *getKernelArgsBufferAllocation() const { return kernelArgsBufferAllocation; } virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0; @@ -396,6 +397,10 @@ class CommandStreamReceiver { L1CachePolicy *getStoredL1CachePolicy() { return &l1CachePolicyData; } + void createGlobalStatelessHeap(); + IndirectHeap *getGlobalStatelessHeap() { + return globalStatelessHeap.get(); + } protected: void cleanupResources(); @@ -418,6 +423,7 @@ class CommandStreamReceiver { std::unique_ptr timestampPacketAllocator; std::unique_ptr userPauseConfirmation; std::unique_ptr logicalStateHelper; + std::unique_ptr globalStatelessHeap; ResidencyContainer residencyAllocations; ResidencyContainer evictionAllocations; @@ -450,6 +456,7 @@ class CommandStreamReceiver { GraphicsAllocation *clearColorAllocation = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; GraphicsAllocation *kernelArgsBufferAllocation = nullptr; + GraphicsAllocation *globalStatelessHeapAllocation = nullptr; MultiGraphicsAllocation *tagsMultiAllocation = nullptr; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index e92156cd77..0686bc9bba 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -626,6 +626,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( makeResident(*globalFenceAllocation); } + if (globalStatelessHeapAllocation) { + makeResident(*globalStatelessHeapAllocation); + } + if (preemptionAllocation) { makeResident(*preemptionAllocation); } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index ef8177a8f3..f0a8b732d3 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -13,6 +13,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/api_specific_config.h" +#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" @@ -2951,3 +2952,37 @@ HWTEST_F(CommandStreamReceiverTest, givenL1CachePolicyInitializedInCsrWhenGettin EXPECT_EQ(productHelper.getL1CachePolicy(true), l1CachePolicy->getL1CacheValue(true)); EXPECT_EQ(productHelper.getL1CachePolicy(false), l1CachePolicy->getL1CacheValue(false)); } + +HWTEST_F(CommandStreamReceiverHwTest, givenCreateGlobalStatelessHeapAllocationWhenGettingIndirectHeapObjectThenHeapAndAllocationAreInitialized) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + EXPECT_EQ(nullptr, commandStreamReceiver.getGlobalStatelessHeap()); + + commandStreamReceiver.createGlobalStatelessHeap(); + auto heap = commandStreamReceiver.getGlobalStatelessHeap(); + ASSERT_NE(nullptr, heap); + EXPECT_EQ(commandStreamReceiver.getGlobalStatelessHeapAllocation(), heap->getGraphicsAllocation()); + + auto heapAllocation = commandStreamReceiver.getGlobalStatelessHeapAllocation(); + commandStreamReceiver.createGlobalStatelessHeap(); + EXPECT_EQ(commandStreamReceiver.getGlobalStatelessHeap(), heap); + EXPECT_EQ(commandStreamReceiver.getGlobalStatelessHeapAllocation(), heap->getGraphicsAllocation()); + EXPECT_EQ(commandStreamReceiver.getGlobalStatelessHeapAllocation(), heapAllocation); +} + +HWTEST_F(CommandStreamReceiverHwTest, givenCreateGlobalStatelessHeapAllocationWhenFlushingTaskThenGlobalStatelessHeapAllocationIsResident) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.storeMakeResidentAllocations = true; + EXPECT_EQ(nullptr, commandStreamReceiver.getGlobalStatelessHeap()); + + commandStreamReceiver.createGlobalStatelessHeap(); + + commandStreamReceiver.flushTask(commandStream, + 0, + &dsh, + &ioh, + &ssh, + taskLevel, + flushTaskFlags, + *pDevice); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getGlobalStatelessHeapAllocation())); +}