diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 7c22b88a46..a2eebfecf5 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -11,6 +11,7 @@ #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/cache_policy.h" +#include "shared/source/helpers/common_types.h" #include "shared/source/helpers/definitions/command_encoder_args.h" #include "shared/source/helpers/heap_base_address_model.h" #include "shared/source/memory_manager/prefetch_manager.h" @@ -356,7 +357,7 @@ struct CommandList : _ze_command_list_handle_t { MOCKABLE_VIRTUAL void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList); std::map hostPtrMap; - std::unordered_map ownedPrivateAllocations; + NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations; std::vector patternAllocations; std::vector printfKernelContainer; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 792c12c462..2149233dfb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -309,7 +309,7 @@ struct CommandListCoreFamily : CommandListImp { return (this->pipeControlMultiKernelEventSync && splitKernel) || compactL3FlushEvent(dcFlush); } - MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map &privateAllocsToReuse); + MOCKABLE_VIRTUAL void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse); virtual void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread); CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations); void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 6009e0501e..e073fc6e6d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3182,15 +3182,22 @@ void CommandListCoreFamily::allocateOrReuseKernelPrivateMemoryIfN } template -void CommandListCoreFamily::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map &privateAllocsToReuse) { +void CommandListCoreFamily::allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse) { L0::KernelImp *kernelImp = static_cast(kernel); NEO::GraphicsAllocation *privateAlloc = nullptr; - if (privateAllocsToReuse[sizePerHwThread] != nullptr) { - privateAlloc = privateAllocsToReuse[sizePerHwThread]; - } else { + bool allocToReuseFound = false; + + for (auto &alloc : privateAllocsToReuse) { + if (sizePerHwThread == alloc.first) { + privateAlloc = alloc.second; + allocToReuseFound = true; + break; + } + } + if (!allocToReuseFound) { privateAlloc = kernelImp->allocatePrivateMemoryGraphicsAllocation(); - privateAllocsToReuse[sizePerHwThread] = privateAlloc; + privateAllocsToReuse.push_back({sizePerHwThread, privateAlloc}); } kernelImp->patchAndMoveToResidencyContainerPrivateSurface(privateAlloc); } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 674ac6a126..43ddd80788 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -534,7 +534,7 @@ class MockCommandListCoreFamily : public CommandListCoreFamily { false, (L0::Kernel * kernel, uint32_t sizePerHwThread, - std::unordered_map &privateAllocsToReuse), + NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse), (kernel, sizePerHwThread, privateAllocsToReuse)); ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded, @@ -631,7 +631,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm false, (L0::Kernel * kernel, uint32_t sizePerHwThread, - std::unordered_map &privateAllocsToReuse), + NEO::PrivateAllocsToReuseContainer &privateAllocsToReuse), (kernel, sizePerHwThread, privateAllocsToReuse)); ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemoryIfNeeded, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 5610ee0457..b52412b2c0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -3193,11 +3193,11 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen template class MockCommandListCoreFamilyIfPrivateNeeded : public BaseMock { public: - void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, std::unordered_map &privateAllocsToReuse) override { + void allocateOrReuseKernelPrivateMemory(Kernel *kernel, uint32_t sizePerHwThread, PrivateAllocsToReuseContainer &privateAllocsToReuse) override { passedContainer = &privateAllocsToReuse; BaseMock::allocateOrReuseKernelPrivateMemory(kernel, sizePerHwThread, privateAllocsToReuse); } - std::unordered_map *passedContainer; + PrivateAllocsToReuseContainer *passedContainer; }; HWTEST2_F(CommandListCreate, givenPrivatePerDispatchDisabledWhenAllocatingPrivateMemoryThenAllocateIsNotCalled, IsAtLeastSkl) { @@ -3281,8 +3281,8 @@ HWTEST2_F(CommandListCreate, givenCmdListWhenAllocateOrReuseCalledForSizeThatIsS const_cast(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000; mockKernel.module = &mockModule; MockGraphicsAllocation mockGA(mockMem.get(), 2 * sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch); - std::unordered_map mapForReuse; - mapForReuse[sizePerHwThread] = &mockGA; + PrivateAllocsToReuseContainer mapForReuse; + mapForReuse.push_back({sizePerHwThread, &mockGA}); commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread, mapForReuse); EXPECT_EQ(mockKernel.residencyContainer[0], &mockGA); } @@ -3298,8 +3298,8 @@ HWTEST2_F(CommandListCreate, givenNewSizeDifferentThanSizesInMapWhenAllocatingPr const_cast(mockKernel.kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) = 0x1000; mockKernel.module = &mockModule; MockGraphicsAllocation mockGA(mockMem.get(), sizePerHwThread * this->neoDevice->getDeviceInfo().computeUnitsUsedForScratch / 2); - std::unordered_map mapForReuse; - mapForReuse[sizePerHwThread] = &mockGA; + PrivateAllocsToReuseContainer mapForReuse; + mapForReuse.push_back({sizePerHwThread, &mockGA}); commandList->allocateOrReuseKernelPrivateMemory(&mockKernel, sizePerHwThread / 2, mapForReuse); EXPECT_NE(mockKernel.residencyContainer[0], &mockGA); neoDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.residencyContainer[0]); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 6b1a9eb7c7..438ba573ac 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -571,7 +571,7 @@ ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() { ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() { return this->evictionAllocations; } -std::unordered_map &CommandStreamReceiver::getOwnedPrivateAllocations() { +PrivateAllocsToReuseContainer &CommandStreamReceiver::getOwnedPrivateAllocations() { return this->ownedPrivateAllocations; } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 6dfc2ec890..690fecbcf2 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -11,6 +11,7 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/blit_properties_container.h" #include "shared/source/helpers/cache_policy.h" +#include "shared/source/helpers/common_types.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/options.h" #include "shared/source/utilities/spinlock.h" @@ -118,7 +119,7 @@ class CommandStreamReceiver { ResidencyContainer &getResidencyAllocations(); ResidencyContainer &getEvictionAllocations(); - std::unordered_map &getOwnedPrivateAllocations(); + PrivateAllocsToReuseContainer &getOwnedPrivateAllocations(); virtual GmmPageTableMngr *createPageTableManager() { return nullptr; } bool needsPageTableManager() const; @@ -461,7 +462,7 @@ class CommandStreamReceiver { ResidencyContainer residencyAllocations; ResidencyContainer evictionAllocations; - std::unordered_map ownedPrivateAllocations; + PrivateAllocsToReuseContainer ownedPrivateAllocations; MutexType ownershipMutex; MutexType hostPtrSurfaceCreationMutex; diff --git a/shared/source/helpers/common_types.h b/shared/source/helpers/common_types.h index 1d7acea801..7d7515f342 100644 --- a/shared/source/helpers/common_types.h +++ b/shared/source/helpers/common_types.h @@ -13,11 +13,13 @@ #include namespace NEO { +class GraphicsAllocation; struct EngineControl; using EngineControlContainer = std::vector; using MultiDeviceEngineControlContainer = StackVec; class Device; using DeviceVector = std::vector>; +using PrivateAllocsToReuseContainer = StackVec, 8>; enum class DebugPauseState : uint32_t { disabled, diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 0dd4a43566..06a66d1e7a 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -4491,7 +4491,7 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCleanUpResourcesThenOwnedPrivate auto mockGA = std::make_unique(); auto mapForReuse = &csr.getOwnedPrivateAllocations(); - mapForReuse->insert({0x100, mockGA.release()}); + mapForReuse->push_back({0x100, mockGA.release()}); csr.cleanupResources(); EXPECT_EQ(mapForReuse->size(), 0u); }