From 046f9d95fc3d57287af1f4515ab0b5404ed1ea3b Mon Sep 17 00:00:00 2001 From: "Milczarek, Slawomir" Date: Tue, 2 Aug 2022 16:08:54 +0000 Subject: [PATCH] zeCommandListAppendMemoryPrefetch with memory prefetch manager Related-To: NEO-6740 Signed-off-by: Milczarek, Slawomir --- level_zero/core/source/cmdlist/cmdlist.h | 4 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 1 + .../source/cmdlist/cmdlist_hw_immediate.inl | 7 + .../core/source/cmdqueue/cmdqueue_hw.inl | 12 + .../xe_hpc_core/cmdlist_xe_hpc_core.cpp | 8 +- .../xe_hpc_core/test_cmdlist_xe_hpc_core.cpp | 262 ++++++++++++------ shared/source/memory_manager/CMakeLists.txt | 2 + .../source/memory_manager/memory_manager.cpp | 2 + shared/source/memory_manager/memory_manager.h | 6 + .../memory_manager/prefetch_manager.cpp | 34 +++ .../source/memory_manager/prefetch_manager.h | 37 +++ .../test/common/mocks/mock_memory_manager.h | 1 + .../memory_manager/mock_prefetch_manager.h | 17 ++ 13 files changed, 305 insertions(+), 88 deletions(-) create mode 100644 shared/source/memory_manager/prefetch_manager.cpp create mode 100644 shared/source/memory_manager/prefetch_manager.h create mode 100644 shared/test/unit_test/memory_manager/mock_prefetch_manager.h diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index e9c31d5077..c935ef1781 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -229,6 +229,9 @@ struct CommandList : _ze_command_list_handle_t { bool containsCooperativeKernels() const { return containsCooperativeKernelsFlag; } + bool isMemoryPrefetchRequested() const { + return performMemoryPrefetch; + } enum CommandListType : uint32_t { TYPE_REGULAR = 0u, @@ -293,6 +296,7 @@ struct CommandList : _ze_command_list_handle_t { bool internalUsage = false; bool containsCooperativeKernelsFlag = false; bool containsStatelessUncachedResource = false; + bool performMemoryPrefetch = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b7ffafb3b7..1b8d42b16a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -85,6 +85,7 @@ ze_result_t CommandListCoreFamily::reset() { removeHostPtrAllocations(); commandContainer.reset(); containsStatelessUncachedResource = false; + performMemoryPrefetch = false; indirectAllocationsAllowed = false; unifiedMemoryControls.indirectHostAllocationsAllowed = false; unifiedMemoryControls.indirectSharedAllocationsAllowed = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index da9b67e86a..273e7e171f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -12,6 +12,7 @@ #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/logical_state_helper.h" #include "shared/source/memory_manager/internal_allocation_storage.h" +#include "shared/source/memory_manager/prefetch_manager.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" @@ -108,6 +109,12 @@ ze_result_t CommandListCoreFamilyImmediate::executeCommandListImm this->migrateSharedAllocations(); } + if (this->performMemoryPrefetch) { + auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager(); + prefetchManager->migrateAllocationsToGpu(*this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice()); + this->performMemoryPrefetch = false; + } + auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT)); NEO::IndirectHeap *dsh = nullptr; NEO::IndirectHeap *ssh = nullptr; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index db778118df..db13e0b89c 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -27,6 +27,7 @@ #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/prefetch_manager.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" @@ -83,6 +84,7 @@ ze_result_t CommandQueueHw::executeCommandLists( auto anyCommandListWithoutCooperativeKernels = false; bool anyCommandListRequiresDisabledEUFusion = false; bool cachedMOCSAllowed = true; + bool performMemoryPrefetch = false; for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); @@ -108,6 +110,10 @@ ze_result_t CommandQueueHw::executeCommandLists( if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) { cachedMOCSAllowed = false; } + + if (commandList->isMemoryPrefetchRequested()) { + performMemoryPrefetch = true; + } } bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get(); @@ -446,6 +452,12 @@ ze_result_t CommandQueueHw::executeCommandLists( commandList->migrateSharedAllocations(); } + if (performMemoryPrefetch) { + auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager(); + prefetchManager->migrateAllocationsToGpu(*this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice()); + performMemoryPrefetch = false; + } + if (!isCopyOnlyCommandQueue && stateSipRequired) { NEO::PreemptionHelper::programStateSipEndWa(child, *neoDevice); } diff --git a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp index 34026cd1c4..96376e279e 100644 --- a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/source/memory_manager/prefetch_manager.h" + #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" @@ -33,7 +35,11 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const } if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) { - svmAllocMgr->prefetchMemory(*device->getNEODevice(), *allocData); + this->performMemoryPrefetch = true; + auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager(); + if (prefetchManager) { + prefetchManager->insertAllocation(*allocData); + } } if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) { diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 87e91023fc..840e28d16c 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -9,6 +9,7 @@ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/hw_test.h" +#include "shared/test/unit_test/memory_manager/mock_prefetch_manager.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" @@ -64,7 +65,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenKernelUsingSyncBufferWhen using CommandListStatePrefetchXeHpcCore = Test; -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledThenDontSetMemPrefetch, IsXeHpcCore) { +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenDontRequestMemoryPrefetchByDefault, IsXeHpcCore) { auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -75,20 +76,19 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; - auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); + result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); - EXPECT_FALSE(memoryManager->setMemPrefetchCalled); + EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested()); context->freeMem(ptr); } -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiCalledThenDontCallSetMemPrefetchByDefault, IsXeHpcCore) { +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiIsCalledThenRequestMemoryPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); @@ -102,60 +102,195 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; - auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); + result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); - EXPECT_FALSE(memoryManager->setMemPrefetchCalled); + EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested()); context->freeMem(ptr); } -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetch, IsXeHpcCore) { +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedSharedMemoryThenAppendAllocationForPrefetch, IsXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); + DebugManager.flags.UseKmdMigration.set(1); + + auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + memoryManager->prefetchManager.reset(new MockPrefetchManager()); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested()); + + auto prefetchManager = static_cast(memoryManager->prefetchManager.get()); + EXPECT_EQ(1u, prefetchManager->allocations.size()); + + context->freeMem(ptr); +} + +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedDeviceMemoryThenDontAppendAllocationForPrefetch, IsXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); + DebugManager.flags.UseKmdMigration.set(1); + + auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + memoryManager->prefetchManager.reset(new MockPrefetchManager()); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested()); + + auto prefetchManager = static_cast(memoryManager->prefetchManager.get()); + EXPECT_EQ(0u, prefetchManager->allocations.size()); + + context->freeMem(ptr); +} + +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedHostMemoryThenDontAppendAllocationForPrefetch, IsXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); + DebugManager.flags.UseKmdMigration.set(1); + + auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + memoryManager->prefetchManager.reset(new MockPrefetchManager()); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_host_mem_alloc_desc_t hostDesc = {}; + result = context->allocHostMem(&hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested()); + + auto prefetchManager = static_cast(memoryManager->prefetchManager.get()); + EXPECT_EQ(0u, prefetchManager->allocations.size()); + + context->freeMem(ptr); +} + +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedDeviceMemoryThenDontCallSetMemPrefetchOnTheAssociatedDevice, IsXeHpcCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); EXPECT_EQ(0b0001u, neoDevice->deviceBitfield.to_ulong()); - auto pCommandList = std::make_unique>>(); - auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); + auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + memoryManager->prefetchManager.reset(new MockPrefetchManager()); + + createKernel(); + ze_result_t returnValue; + ze_command_queue_desc_t queueDesc = {}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; - ze_host_mem_alloc_desc_t hostDesc = {}; - auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); + auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + result = commandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); - EXPECT_TRUE(memoryManager->setMemPrefetchCalled); - EXPECT_EQ(0u, memoryManager->memPrefetchSubDeviceId); + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_FALSE(memoryManager->setMemPrefetchCalled); context->freeMem(ptr); + commandList->destroy(); } -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetchOnTheAssociatedDevice, IsXeHpcCore) { +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedSharedMemoryThenCallSetMemPrefetchOnTheAssociatedDevice, IsXeHpcCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); neoDevice->deviceBitfield = 0b0010; - auto pCommandList = std::make_unique>>(); - auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); + auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + memoryManager->prefetchManager.reset(new MockPrefetchManager()); + + createKernel(); + ze_result_t returnValue; + ze_command_queue_desc_t queueDesc = {}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); size_t size = 10; size_t alignment = 1u; @@ -163,70 +298,23 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; - auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + result = commandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_TRUE(memoryManager->setMemPrefetchCalled); EXPECT_EQ(1u, memoryManager->memPrefetchSubDeviceId); context->freeMem(ptr); -} - -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedDeviceMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) { - DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); - DebugManager.flags.UseKmdMigration.set(1); - - auto pCommandList = std::make_unique>>(); - auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - size_t size = 10; - size_t alignment = 1u; - void *ptr = nullptr; - - ze_device_mem_alloc_desc_t deviceDesc = {}; - context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); - EXPECT_NE(nullptr, ptr); - - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); - - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); - EXPECT_FALSE(memoryManager->setMemPrefetchCalled); - - context->freeMem(ptr); -} - -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedHostMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) { - DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); - DebugManager.flags.UseKmdMigration.set(1); - - auto pCommandList = std::make_unique>>(); - auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - size_t size = 10; - size_t alignment = 1u; - void *ptr = nullptr; - - ze_host_mem_alloc_desc_t hostDesc = {}; - context->allocHostMem(&hostDesc, size, alignment, &ptr); - EXPECT_NE(nullptr, ptr); - - auto ret = pCommandList->appendMemoryPrefetch(ptr, size); - EXPECT_EQ(ZE_RESULT_SUCCESS, ret); - - auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); - EXPECT_FALSE(memoryManager->setMemPrefetchCalled); - - context->freeMem(ptr); + commandList->destroy(); } using CommandListEventFenceTestsXeHpcCore = Test; diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index 0fe156f348..b7c32edce7 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -57,6 +57,8 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl + ${CMAKE_CURRENT_SOURCE_DIR}/prefetch_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/prefetch_manager.h ) if(ENABLE_DYNAMIC_MEMORY_TRACKING) diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index c9f29e37e3..fabf27f49b 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -27,6 +27,7 @@ #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/internal_allocation_storage.h" +#include "shared/source/memory_manager/prefetch_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" @@ -62,6 +63,7 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu if (anyLocalMemorySupported) { pageFaultManager = PageFaultManager::create(); + prefetchManager = PrefetchManager::create(); } if (DebugManager.flags.EnableMultiStorageResources.get() != -1) { diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index d6337c9381..c22508835c 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -34,6 +34,7 @@ class ExecutionEnvironment; class Gmm; class HostPtrManager; class OsContext; +class PrefetchManager; enum AllocationUsage { TEMPORARY_ALLOCATION, @@ -154,6 +155,10 @@ class MemoryManager { return pageFaultManager.get(); } + PrefetchManager *getPrefetchManager() const { + return prefetchManager.get(); + } + void waitForDeletions(); MOCKABLE_VIRTUAL void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation); void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion); @@ -313,6 +318,7 @@ class MemoryManager { std::vector> externalLocalMemoryUsageBankSelector; void *reservedMemory = nullptr; std::unique_ptr pageFaultManager; + std::unique_ptr prefetchManager; OSMemory::ReservedCpuAddressRange reservedCpuAddressRange; HeapAssigner heapAssigner; AlignmentSelector alignmentSelector = {}; diff --git a/shared/source/memory_manager/prefetch_manager.cpp b/shared/source/memory_manager/prefetch_manager.cpp new file mode 100644 index 0000000000..096700c897 --- /dev/null +++ b/shared/source/memory_manager/prefetch_manager.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/prefetch_manager.h" + +#include "shared/source/device/device.h" +#include "shared/source/memory_manager/unified_memory_manager.h" + +namespace NEO { + +std::unique_ptr PrefetchManager::create() { + return std::make_unique(); +} + +void PrefetchManager::insertAllocation(SvmAllocationData &svmData) { + std::unique_lock lock{mtx}; + if (svmData.memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { + allocations.push_back(svmData); + } +} + +void PrefetchManager::migrateAllocationsToGpu(SVMAllocsManager &unifiedMemoryManager, Device &device) { + std::unique_lock lock{mtx}; + for (auto allocData : allocations) { + unifiedMemoryManager.prefetchMemory(device, allocData); + } + allocations.clear(); +} + +} // namespace NEO diff --git a/shared/source/memory_manager/prefetch_manager.h b/shared/source/memory_manager/prefetch_manager.h new file mode 100644 index 0000000000..001eae45a9 --- /dev/null +++ b/shared/source/memory_manager/prefetch_manager.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/helpers/non_copyable_or_moveable.h" +#include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/utilities/spinlock.h" + +#include +#include + +namespace NEO { + +class Device; +class SVMAllocsManager; + +class PrefetchManager : public NonCopyableOrMovableClass { + public: + static std::unique_ptr create(); + + virtual ~PrefetchManager() = default; + + void insertAllocation(SvmAllocationData &svmData); + + void migrateAllocationsToGpu(SVMAllocsManager &unifiedMemoryManager, Device &device); + + protected: + std::vector allocations; + SpinLock mtx; +}; + +} // namespace NEO diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 134c1726d5..a7323dcc29 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -43,6 +43,7 @@ class MockMemoryManager : public MemoryManagerCreate { using MemoryManager::multiContextResourceDestructor; using MemoryManager::overrideAllocationData; using MemoryManager::pageFaultManager; + using MemoryManager::prefetchManager; using MemoryManager::registeredEngines; using MemoryManager::supportsMultiStorageResources; using MemoryManager::useNonSvmHostPtrAlloc; diff --git a/shared/test/unit_test/memory_manager/mock_prefetch_manager.h b/shared/test/unit_test/memory_manager/mock_prefetch_manager.h new file mode 100644 index 0000000000..44bc1894d7 --- /dev/null +++ b/shared/test/unit_test/memory_manager/mock_prefetch_manager.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/memory_manager/prefetch_manager.h" + +using namespace NEO; + +class MockPrefetchManager : public PrefetchManager { + public: + using PrefetchManager::allocations; +};