From ffad5c6c093d18af533e87285c9a3f3ec824c7a1 Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Wed, 14 Sep 2022 09:39:13 +0000 Subject: [PATCH] Store indirect residency at command queue level Signed-off-by: Maciej Plewka Related-To: NEO-7211 --- level_zero/core/source/cmdlist/cmdlist.cpp | 22 ---- level_zero/core/source/cmdlist/cmdlist.h | 2 - .../source/cmdlist/cmdlist_hw_immediate.inl | 6 +- level_zero/core/source/cmdqueue/cmdqueue.cpp | 17 +++ level_zero/core/source/cmdqueue/cmdqueue.h | 5 + level_zero/core/source/cmdqueue/cmdqueue_hw.h | 3 + .../core/source/cmdqueue/cmdqueue_hw.inl | 15 ++- .../core/source/cmdqueue/cmdqueue_imp.h | 3 + .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../unit_tests/mocks/mock_driver_handle.h | 1 + .../sources/cmdlist/test_cmdlist_6.cpp | 48 +++++++ .../sources/cmdqueue/test_cmdqueue_1.cpp | 122 ++++++++++++++++++ .../sources/cmdqueue/test_cmdqueue_3.cpp | 121 +++++++++-------- .../memory_manager/unified_memory_manager.cpp | 37 +++--- .../memory_manager/unified_memory_manager.h | 11 +- shared/test/common/mocks/mock_svm_manager.h | 1 + .../unit_test/memory_manager/CMakeLists.txt | 1 + .../unified_memory_manager_tests.cpp | 34 +++++ 18 files changed, 336 insertions(+), 114 deletions(-) create mode 100644 shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index 3148d0ce01..1263ec8db2 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -163,28 +163,6 @@ void CommandList::migrateSharedAllocations() { } } -void CommandList::handleIndirectAllocationResidency() { - bool indirectAllocationsAllowed = this->hasIndirectAllocationsAllowed(); - NEO::Device *neoDevice = this->device->getNEODevice(); - if (indirectAllocationsAllowed) { - auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager(); - auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex()); - if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) { - submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get(); - } - - if (submitAsPack) { - svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u); - } else { - UnifiedMemoryControls unifiedMemoryControls = this->getUnifiedMemoryControls(); - - svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), - this->commandContainer.getResidencyContainer(), - unifiedMemoryControls.generateMask()); - } - } -} - bool CommandList::setupTimestampEventForMultiTile(Event *signalEvent) { if (this->partitionCount > 1 && signalEvent) { diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 07b0779f0d..e088cf0305 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -217,8 +217,6 @@ struct CommandList : _ze_command_list_handle_t { return indirectAllocationsAllowed; } - void handleIndirectAllocationResidency(); - NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel); std::vector &getPrintfKernelContainer() { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 098972f283..a998c89f0d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -91,8 +91,10 @@ ze_result_t CommandListCoreFamilyImmediate::executeCommandListImm size_t commandStreamStart = this->cmdListCurrentStartOffset; auto lockCSR = this->csr->obtainUniqueOwnership(); - - this->handleIndirectAllocationResidency(); + std::unique_lock lockForIndirect; + if (this->hasIndirectAllocationsAllowed()) { + this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect); + } this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 7dd627496b..770f3ae8fd 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -256,4 +256,21 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman return waitStatus; } +void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect) { + NEO::Device *neoDevice = this->device->getNEODevice(); + auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager(); + auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex()); + if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) { + submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get(); + } + + if (submitAsPack) { + svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u); + } else { + lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership(); + svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), + this->csr->getResidencyAllocations(), + unifiedMemoryControls.generateMask()); + } +} } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index ce4ab7579b..601a187697 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -12,6 +12,7 @@ #include #include +#include struct _ze_command_queue_handle_t {}; @@ -19,6 +20,8 @@ namespace NEO { class CommandStreamReceiver; } +struct UnifiedMemoryControls; + namespace L0 { struct Device; @@ -49,6 +52,8 @@ struct CommandQueue : _ze_command_queue_handle_t { return static_cast(handle); } + virtual void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect) = 0; + ze_command_queue_handle_t toHandle() { return this; } bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index 1456cebd9c..486ad74138 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -9,6 +9,7 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/unified_memory/unified_memory.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" @@ -89,6 +90,8 @@ struct CommandQueueHw : public CommandQueueImp { bool isMigrationRequested{}; bool isDirectSubmissionEnabled{}; bool isDispatchTaskCountPostSyncRequired{}; + bool hasIndirectAccess{}; + UnifiedMemoryControls unifiedMemoryControls; }; ze_result_t validateCommandListsParams(CommandListExecutionContext &ctx, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 3d52594796..18d5f35468 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -93,7 +93,6 @@ ze_result_t CommandQueueHw::executeCommandLists( } this->device->activateMetricGroups(); - if (this->isCopyOnlyCommandQueue) { ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence); } else { @@ -117,9 +116,13 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( this->setupCmdListsAndContextParams(ctx, phCommandLists, numCommandLists, hFence); ctx.isDirectSubmissionEnabled = this->csr->isDirectSubmissionEnabled(); + std::unique_lock lockForIndirect; + if (ctx.hasIndirectAccess) { + handleIndirectAllocationResidency(ctx.unifiedMemoryControls, lockForIndirect); + } + size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists); - this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency); this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx); this->setFrontEndStateProperties(ctx); @@ -443,6 +446,12 @@ CommandQueueHw::CommandListExecutionContext::CommandListExecution if (commandList->isMemoryPrefetchRequested()) { this->performMemoryPrefetch = true; } + hasIndirectAccess |= commandList->hasIndirectAllocationsAllowed(); + if (commandList->hasIndirectAllocationsAllowed()) { + unifiedMemoryControls.indirectDeviceAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed; + unifiedMemoryControls.indirectHostAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectHostAllocationsAllowed; + unifiedMemoryControls.indirectSharedAllocationsAllowed |= commandList->getUnifiedMemoryControls().indirectSharedAllocationsAllowed; + } } this->isDevicePreemptionModeMidThread = device->getDevicePreemptionMode() == NEO::PreemptionMode::MidThread; this->stateSipRequired = (this->isPreemptionModeInitial && this->isDevicePreemptionModeMidThread) || @@ -522,10 +531,8 @@ void CommandQueueHw::setupCmdListsAndContextParams( auto commandList = CommandList::fromHandle(phCommandLists[i]); commandList->csr = this->csr; - commandList->handleIndirectAllocationResidency(); ctx.containsAnyRegularCmdList |= commandList->cmdListType == CommandList::CommandListType::TYPE_REGULAR; - ctx.spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); if (!isCopyOnlyCommandQueue) { ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index b4d137dd69..3b2b800896 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -17,6 +17,8 @@ #include +struct UnifiedMemoryControls; + namespace NEO { class LinearStream; class GraphicsAllocation; @@ -81,6 +83,7 @@ struct CommandQueueImp : public CommandQueue { MOCKABLE_VIRTUAL NEO::WaitStatus reserveLinearStreamSize(size_t size); ze_command_queue_mode_t getSynchronousMode() const; virtual bool getPreemptionCmdProgramming() = 0; + void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect) override; protected: MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 5eb29267b1..653986fd28 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -132,6 +132,7 @@ template struct MockCommandListImmediate : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::requiredStreamState; using CommandListCoreFamilyImmediate::containsAnyKernel; + using CommandListCoreFamilyImmediate::indirectAllocationsAllowed; }; template <> diff --git a/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h b/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h index a35fef219e..f899737cdc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h +++ b/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h @@ -19,6 +19,7 @@ namespace ult { template <> struct WhiteBox<::L0::DriverHandle> : public ::L0::DriverHandleImp { using ::L0::DriverHandleImp::enableProgramDebugging; + using ::L0::DriverHandleImp::svmAllocsManager; }; using DriverHandle = WhiteBox<::L0::DriverHandle>; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 1891f45488..0d9f23ee42 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" @@ -647,5 +648,52 @@ TEST(CommandList, whenAsMutableIsCalledNullptrIsReturned) { EXPECT_EQ(nullptr, cmdList.asMutable()); } +class MockCommandQueueIndirectAccess : public Mock { + public: + MockCommandQueueIndirectAccess(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : Mock(device, csr, desc) {} + void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect) override { + handleIndirectAllocationResidencyCalledTimes++; + } + uint32_t handleIndirectAllocationResidencyCalledTimes = 0; +}; + +HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessCalled, IsAtLeastSkl) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + auto &commandListImmediate = static_cast &>(*commandList); + + MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); + MockCommandQueueIndirectAccess mockCommandQueue(device, &mockCommandStreamReceiver, &desc); + + auto oldCommandQueue = commandList->cmdQImmediate; + commandList->cmdQImmediate = &mockCommandQueue; + commandListImmediate.indirectAllocationsAllowed = true; + commandListImmediate.executeCommandListImmediateWithFlushTask(false); + EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u); + commandList->cmdQImmediate = oldCommandQueue; +} + +HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessNotCalled, IsAtLeastSkl) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + auto &commandListImmediate = static_cast &>(*commandList); + + MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); + MockCommandQueueIndirectAccess mockCommandQueue(device, &mockCommandStreamReceiver, &desc); + + auto oldCommandQueue = commandList->cmdQImmediate; + commandList->cmdQImmediate = &mockCommandQueue; + commandListImmediate.indirectAllocationsAllowed = false; + commandListImmediate.executeCommandListImmediateWithFlushTask(false); + EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u); + commandList->cmdQImmediate = oldCommandQueue; +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index 92ebcb1978..6a4be8b70e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -19,6 +19,7 @@ #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" +#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" namespace L0 { @@ -1815,5 +1816,126 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingMultiReturnPointFl commandQueue->destroy(); } +struct SVMAllocsManagerMock : public NEO::SVMAllocsManager { + using SVMAllocsManager::mtx; + SVMAllocsManagerMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} + void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) override { + makeIndirectAllocationsResidentCalledTimes++; + } + void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, + ResidencyContainer &residencyContainer, + uint32_t requestedTypesMask) override { + addInternalAllocationsToResidencyContainerCalledTimes++; + passedContainer = residencyContainer.data(); + } + uint32_t makeIndirectAllocationsResidentCalledTimes = 0; + uint32_t addInternalAllocationsToResidencyContainerCalledTimes = 0; + GraphicsAllocation **passedContainer; +}; + +TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackEnabledThenMakeIndirectAllocResidentCalled) { + DebugManagerStateRestore restore; + DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(1); + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + + auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + std::unique_lock lock; + auto mockSvmAllocsManager = std::make_unique(device->getDriverHandle()->getMemoryManager()); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get(); + + commandQueue->handleIndirectAllocationResidency({true, true, true}, lock); + EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 1u); + EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 0u); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager; + commandQueue->destroy(); +} + +TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenAddInternalAllocationsToResidencyContainer) { + DebugManagerStateRestore restore; + DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + + auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + std::unique_lock lock; + auto mockSvmAllocsManager = std::make_unique(device->getDriverHandle()->getMemoryManager()); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get(); + + commandQueue->handleIndirectAllocationResidency({true, true, true}, lock); + EXPECT_EQ(mockSvmAllocsManager->makeIndirectAllocationsResidentCalledTimes, 0u); + EXPECT_EQ(mockSvmAllocsManager->addInternalAllocationsToResidencyContainerCalledTimes, 1u); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager; + lock.unlock(); + commandQueue->destroy(); +} + +TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenSVMAllocsMtxIsLocked) { + DebugManagerStateRestore restore; + DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + + auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + std::unique_lock lock; + auto mockSvmAllocsManager = std::make_unique(device->getDriverHandle()->getMemoryManager()); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get(); + + commandQueue->handleIndirectAllocationResidency({true, true, true}, lock); + std::thread th([&] { + EXPECT_FALSE(mockSvmAllocsManager->mtx.try_lock()); + }); + th.join(); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager; + lock.unlock(); + commandQueue->destroy(); +} + +TEST_F(CommandQueueCreate, givenCommandQueueWhenHandleIndirectAllocationResidencyCalledAndSubmiPackDisabeldThenResidencyContainerFromCsrIsUsed) { + DebugManagerStateRestore restore; + DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + + auto prevSvmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + std::unique_lock lock; + auto mockSvmAllocsManager = std::make_unique(device->getDriverHandle()->getMemoryManager()); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = mockSvmAllocsManager.get(); + + commandQueue->handleIndirectAllocationResidency({true, true, true}, lock); + EXPECT_EQ(commandQueue->csr->getResidencyAllocations().data(), mockSvmAllocsManager->passedContainer); + reinterpret_cast *>(device->getDriverHandle())->svmAllocsManager = prevSvmAllocsManager; + lock.unlock(); + commandQueue->destroy(); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index 7eb6db5061..745cd7b856 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -14,6 +14,8 @@ #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" +#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -386,68 +388,6 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerP } using CommandQueueIndirectAllocations = Test; -HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAddedToResidencyContainer) { - const ze_command_queue_desc_t desc = {}; - - MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); - csr.initializeTagAllocation(); - csr.createKernelArgsBufferAllocation(); - csr.setupContext(*neoDevice->getDefaultEngine().osContext); - if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { - csr.createPreemptionAllocation(); - } - - ze_result_t returnValue; - L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, - device, - &csr, - &desc, - false, - false, - returnValue); - ASSERT_NE(nullptr, commandQueue); - - std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); - - void *deviceAlloc = nullptr; - ze_device_mem_alloc_desc_t deviceDesc = {}; - auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - ASSERT_NE(nullptr, gpuAlloc); - - createKernel(); - kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; - EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); - - ze_group_count_t groupCount{1, 1, 1}; - CmdListKernelLaunchParams launchParams = {}; - result = commandList->appendLaunchKernel(kernel->toHandle(), - &groupCount, - nullptr, - 0, - nullptr, - launchParams); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), - std::end(commandList->commandContainer.getResidencyContainer()), - gpuAlloc); - EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); - - auto commandListHandle = commandList->toHandle(); - result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), - std::end(commandList->commandContainer.getResidencyContainer()), - gpuAlloc); - EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); - - device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); - commandQueue->destroy(); -} HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocationsAsOnePackWhenIndirectAccessIsUsedThenWholePackIsMadeResident) { DebugManagerStateRestore restorer; @@ -776,6 +716,63 @@ HWTEST2_F(EngineInstancedDeviceExecuteTests, givenEngineInstancedDeviceWhenExecu commandQueue->destroy(); } +template +class MockCommandQueueHandleIndirectAllocs : public MockCommandQueueHw { + public: + using typename MockCommandQueueHw::CommandListExecutionContext; + using MockCommandQueueHw::executeCommandListsRegular; + MockCommandQueueHandleIndirectAllocs(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : MockCommandQueueHw(device, csr, desc) {} + void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect) override { + handleIndirectAllocationResidencyCalledTimes++; + } + uint32_t handleIndirectAllocationResidencyCalledTimes = 0; +}; + +HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWithIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessCalled, IsAtLeastSkl) { + ze_command_queue_desc_t desc = {}; + auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; + auto commandQueue = new MockCommandQueueHandleIndirectAllocs(device, csr, &desc); + commandQueue->initialize(false, false); + auto ctx = typename MockCommandQueueHandleIndirectAllocs::CommandListExecutionContext{nullptr, + 0, + csr->getPreemptionMode(), + device, + false, + csr->isProgramActivePartitionConfigRequired(), + false}; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); + ctx.hasIndirectAccess = true; + ctx.isDispatchTaskCountPostSyncRequired = false; + auto cmdListHandle = commandList.get()->toHandle(); + commandQueue->executeCommandListsRegular(ctx, 0, &cmdListHandle, nullptr); + EXPECT_EQ(commandQueue->handleIndirectAllocationResidencyCalledTimes, 1u); + commandQueue->destroy(); +} + +HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWitNohIndirectAccessWhenExecutingCommandListImmediateWithFlushTaskThenHandleIndirectAccessNotCalled, IsAtLeastSkl) { + ze_command_queue_desc_t desc = {}; + auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; + auto commandQueue = new MockCommandQueueHandleIndirectAllocs(device, csr, &desc); + commandQueue->initialize(false, false); + auto ctx = typename MockCommandQueueHandleIndirectAllocs::CommandListExecutionContext{nullptr, + 0, + csr->getPreemptionMode(), + device, + false, + csr->isProgramActivePartitionConfigRequired(), + false}; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); + ctx.hasIndirectAccess = false; + ctx.isDispatchTaskCountPostSyncRequired = false; + auto cmdListHandle = commandList.get()->toHandle(); + commandQueue->executeCommandListsRegular(ctx, 0, &cmdListHandle, nullptr); + EXPECT_EQ(commandQueue->handleIndirectAllocationResidencyCalledTimes, 0u); + commandQueue->destroy(); +} } // namespace ult } // namespace L0 diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index c968daf9ee..5cb54441a3 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -107,7 +107,7 @@ SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionP void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, ResidencyContainer &residencyContainer, uint32_t requestedTypesMask) { - std::shared_lock lock(mtx); + std::unique_lock lock(mtx); for (auto &allocation : this->SVMAllocs.allocations) { if (rootDeviceIndex >= allocation.second.gpuAllocations.getGraphicsAllocations().size()) { continue; @@ -124,7 +124,7 @@ void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootD } void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) { - std::shared_lock lock(mtx); + std::unique_lock lock(mtx); for (auto &allocation : this->SVMAllocs.allocations) { if (allocation.second.memoryType & requestedTypesMask) { auto gpuAllocation = allocation.second.gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex()); @@ -211,7 +211,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size, allocData.pageSizeForAlignment = pageSizeForAlignment; allocData.setAllocId(this->allocationsCounter++); - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); this->SVMAllocs.insert(allocData); return usmPtr; @@ -288,7 +288,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size, allocData.device = memoryProperties.device; allocData.setAllocId(this->allocationsCounter++); - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); this->SVMAllocs.insert(allocData); return reinterpret_cast(unifiedMemoryAllocation->getGpuAddress()); } @@ -370,7 +370,7 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv allocData.pageSizeForAlignment = pageSizeForAlignment; allocData.setAllocId(this->allocationsCounter++); - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); this->SVMAllocs.insert(allocData); return allocationGpu->getUnderlyingBuffer(); } @@ -381,17 +381,17 @@ void SVMAllocsManager::setUnifiedAllocationProperties(GraphicsAllocation *alloca } SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) { - std::shared_lock lock(mtx); + std::unique_lock lock(mtx); return SVMAllocs.get(ptr); } void SVMAllocsManager::insertSVMAlloc(const SvmAllocationData &svmAllocData) { - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); SVMAllocs.insert(svmAllocData); } void SVMAllocsManager::removeSVMAlloc(const SvmAllocationData &svmAllocData) { - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); SVMAllocs.remove(svmAllocData); } @@ -428,7 +428,7 @@ void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationD if (pageFaultManager) { pageFaultManager->removeAllocation(ptr); } - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) { freeZeroCopySvmAllocation(svmData); } else { @@ -470,7 +470,7 @@ void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAlloca } allocData.size = size; - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); this->SVMAllocs.insert(allocData); return usmPtr; } @@ -534,7 +534,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co allocData.size = size; allocData.setAllocId(this->allocationsCounter++); - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); this->SVMAllocs.insert(allocData); return svmPtr; } @@ -564,7 +564,7 @@ void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svm } bool SVMAllocsManager::hasHostAllocations() { - std::shared_lock lock(mtx); + std::unique_lock lock(mtx); for (auto &allocation : this->SVMAllocs.allocations) { if (allocation.second.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { return true; @@ -574,7 +574,7 @@ bool SVMAllocsManager::hasHostAllocations() { } void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) { - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); bool parseAllAllocations = false; auto entry = indirectAllocationsResidency.find(&commandStreamReceiver); @@ -608,7 +608,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co } void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData) { - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); if (this->indirectAllocationsResidency.size() > 0u) { for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) { auto commandStreamReceiver = internalAllocationsHandling.first; @@ -627,7 +627,7 @@ void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData } SvmMapOperation *SVMAllocsManager::getSvmMapOperation(const void *ptr) { - std::shared_lock lock(mtx); + std::unique_lock lock(mtx); return svmMapOperations.get(ptr); } @@ -638,12 +638,12 @@ void SVMAllocsManager::insertSvmMapOperation(void *regionSvmPtr, size_t regionSi svmMapOperation.offset = offset; svmMapOperation.regionSize = regionSize; svmMapOperation.readOnlyMap = readOnlyMap; - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); svmMapOperations.insert(svmMapOperation); } void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) { - std::unique_lock lock(mtx); + std::unique_lock lock(mtx); svmMapOperations.remove(regionSvmPtr); } @@ -686,4 +686,7 @@ void SVMAllocsManager::prefetchMemory(Device &device, SvmAllocationData &svmData } } +std::unique_lock SVMAllocsManager::obtainOwnership() { + return std::unique_lock(mtx); +} } // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 66739d174f..d996d3c4cd 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -173,17 +173,18 @@ class SVMAllocsManager { MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap); void removeSvmMapOperation(const void *regionSvmPtr); SvmMapOperation *getSvmMapOperation(const void *regionPtr); - void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, - ResidencyContainer &residencyContainer, - uint32_t requestedTypesMask); + MOCKABLE_VIRTUAL void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, + ResidencyContainer &residencyContainer, + uint32_t requestedTypesMask); void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask); void *createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties); void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData); bool hasHostAllocations(); std::atomic allocationsCounter = 0; - void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount); + MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount); void prepareIndirectAllocationForDestruction(SvmAllocationData *); void prefetchMemory(Device &device, SvmAllocationData &svmData); + std::unique_lock obtainOwnership(); std::map indirectAllocationsResidency; @@ -203,9 +204,9 @@ class SVMAllocsManager { MapBasedAllocationTracker SVMAllocs; MapOperationsTracker svmMapOperations; MemoryManager *memoryManager; - std::shared_mutex mtx; bool multiOsContextSupport; SvmAllocationCache usmDeviceAllocationsCache; bool usmDeviceAllocationsCacheEnabled = false; + std::recursive_mutex mtx; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_svm_manager.h b/shared/test/common/mocks/mock_svm_manager.h index 81d8557ef0..5fbb45eef6 100644 --- a/shared/test/common/mocks/mock_svm_manager.h +++ b/shared/test/common/mocks/mock_svm_manager.h @@ -11,6 +11,7 @@ namespace NEO { struct MockSVMAllocsManager : public SVMAllocsManager { public: using SVMAllocsManager::memoryManager; + using SVMAllocsManager::mtx; using SVMAllocsManager::multiOsContextSupport; using SVMAllocsManager::SVMAllocs; using SVMAllocsManager::SVMAllocsManager; diff --git a/shared/test/unit_test/memory_manager/CMakeLists.txt b/shared/test/unit_test/memory_manager/CMakeLists.txt index 7c0e3ed98e..5f2218424a 100644 --- a/shared/test/unit_test/memory_manager/CMakeLists.txt +++ b/shared/test/unit_test/memory_manager/CMakeLists.txt @@ -27,6 +27,7 @@ target_sources(neo_shared_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/storage_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_cache_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp ) add_subdirectories() \ No newline at end of file diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp new file mode 100644 index 0000000000..2955148972 --- /dev/null +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_svm_manager.h" +#include "shared/test/common/mocks/ult_device_factory.h" +#include "shared/test/common/test_macros/test.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +TEST(SvmDeviceAllocationTest, givenGivenSvmAllocsManagerWhenObtainOwnershipCalledThenLockedUniqueLockReturned) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + + auto lock = svmManager->obtainOwnership(); + std::thread th1([&] { + EXPECT_FALSE(svmManager->mtx.try_lock()); + }); + th1.join(); + lock.unlock(); + std::thread th2([&] { + EXPECT_TRUE(svmManager->mtx.try_lock()); + svmManager->mtx.unlock(); + }); + th2.join(); +} \ No newline at end of file