feature: Implement appendMemoryPrefetch for Shared System USM allocations

Related-To: NEO-12989

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2025-03-12 22:47:22 +00:00
committed by Compute-Runtime-Automation
parent cb68ada102
commit 4d281cf51d
29 changed files with 579 additions and 141 deletions

View File

@@ -1991,13 +1991,46 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const void *ptr,
size_t count) {
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
if (allocData) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const void *ptr, size_t size) {
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
if (!allocData) {
if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
this->performMemoryPrefetch = true;
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
if (prefetchManager) {
prefetchManager->insertAllocation(this->prefetchContext, *device->getDriverHandle()->getSvmAllocsManager(), *device->getNEODevice(), ptr, size);
}
return ZE_RESULT_SUCCESS;
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
}
if (NEO::debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) {
this->performMemoryPrefetch = true;
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
if (prefetchManager) {
prefetchManager->insertAllocation(this->prefetchContext, *device->getDriverHandle()->getSvmAllocsManager(), *device->getNEODevice(), ptr, size);
}
}
if (NEO::debugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
commandContainer.addToResidencyContainer(gpuAlloc);
size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress());
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, device->getNEODevice()->getRootDeviceEnvironment());
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -20,40 +20,6 @@
namespace L0 {
template <>
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
if (!allocData) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (NEO::debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) {
this->performMemoryPrefetch = true;
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
if (prefetchManager) {
prefetchManager->insertAllocation(this->prefetchContext, ptr, *allocData);
}
}
if (NEO::debugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
return ZE_RESULT_SUCCESS;
}
auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
commandContainer.addToResidencyContainer(gpuAlloc);
size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress());
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, device->getNEODevice()->getRootDeviceEnvironment());
return ZE_RESULT_SUCCESS;
}
template struct CommandListCoreFamily<IGFX_XE_HPC_CORE>;
template struct CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE>;

View File

@@ -177,6 +177,8 @@ TEST_F(CommandListCreateTests, givenNonExistingPtrThenAppendMemoryPrefetchReturn
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false));
ASSERT_NE(nullptr, commandList);
DebugManagerStateRestore restorer;
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
auto res = commandList->appendMemoryPrefetch(nullptr, 0);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);

View File

@@ -6,8 +6,10 @@
*/
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/unified_memory/usm_memory_support.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/memory_manager/mock_prefetch_manager.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/event/event.h"
@@ -456,5 +458,160 @@ HWTEST2_F(CmdListLargeGrfTestXe2Hpg,
testBody<FamilyType>();
}
using CommandListStatePrefetchXe2HpgCore = Test<ModuleFixture>;
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenRequestMemoryPrefetchByDefault, IsXe2HpgCore) {
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
memoryManager->prefetchManager.reset(new MockPrefetchManager());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
DebugManagerStateRestore restore;
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenRequestMemoryPrefetchByDefaultWithNoPrefetchManager, IsXe2HpgCore) {
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
memoryManager->prefetchManager.reset(new MockPrefetchManager());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
DebugManagerStateRestore restore;
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledAndDebugFlagFalseThenRequestMemoryPrefetchNotCalled, IsXe2HpgCore) {
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
DebugManagerStateRestore restore;
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(false);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalled, IsXe2HpgCore) {
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
memoryManager->prefetchManager.reset(new MockPrefetchManager());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalledWithNoPrefetchManager, IsXe2HpgCore) {
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnUnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchNotCalled, IsXe2HpgCore) {
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
memoryManager->prefetchManager.reset(new MockPrefetchManager());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
} // namespace ult
} // namespace L0

View File

@@ -7,6 +7,7 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/unified_memory/usm_memory_support.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/memory_manager/mock_prefetch_manager.h"
@@ -91,7 +92,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiAndDebuKeyDisabledIsCalledThenRequestMemoryPrefetchIsNotPerformed, IsXeHpcCore) {
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiAndDebugKeyDisabledIsCalledThenRequestMemoryPrefetchIsNotPerformed, IsXeHpcCore) {
DebugManagerStateRestore restore;
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(0);
@@ -117,6 +118,75 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalled, IsXeHpcCore) {
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
memoryManager->prefetchManager.reset(new MockPrefetchManager());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalledWithNoPrefetchManager, IsXeHpcCore) {
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnUnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchNotCalled, IsXeHpcCore) {
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
DebugManagerStateRestore restore;
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
size_t size = 10;
void *ptr = malloc(size);
EXPECT_NE(nullptr, ptr);
result = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
free(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenExecutingCommandListsOnCommandQueueThenMemoryPrefetchIsCalled, IsXeHpcCore) {
DebugManagerStateRestore restore;
debugManager.flags.UseKmdMigration.set(true);