mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
feature: Implement appendMemoryPrefetch for Shared System USM allocations
Related-To: NEO-12989 Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
cb68ada102
commit
4d281cf51d
@@ -1991,13 +1991,46 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const void *ptr,
|
||||
size_t count) {
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
|
||||
if (allocData) {
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const void *ptr, size_t size) {
|
||||
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
|
||||
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
|
||||
|
||||
if (!allocData) {
|
||||
if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) {
|
||||
this->performMemoryPrefetch = true;
|
||||
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
|
||||
if (prefetchManager) {
|
||||
prefetchManager->insertAllocation(this->prefetchContext, *device->getDriverHandle()->getSvmAllocsManager(), *device->getNEODevice(), ptr, size);
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) {
|
||||
this->performMemoryPrefetch = true;
|
||||
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
|
||||
if (prefetchManager) {
|
||||
prefetchManager->insertAllocation(this->prefetchContext, *device->getDriverHandle()->getSvmAllocsManager(), *device->getNEODevice(), ptr, size);
|
||||
}
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
|
||||
commandContainer.addToResidencyContainer(gpuAlloc);
|
||||
|
||||
size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress());
|
||||
|
||||
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
|
||||
|
||||
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, device->getNEODevice()->getRootDeviceEnvironment());
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -20,40 +20,6 @@
|
||||
|
||||
namespace L0 {
|
||||
|
||||
template <>
|
||||
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
|
||||
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
|
||||
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
|
||||
|
||||
if (!allocData) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) {
|
||||
this->performMemoryPrefetch = true;
|
||||
auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
|
||||
if (prefetchManager) {
|
||||
prefetchManager->insertAllocation(this->prefetchContext, ptr, *allocData);
|
||||
}
|
||||
}
|
||||
|
||||
if (NEO::debugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
|
||||
commandContainer.addToResidencyContainer(gpuAlloc);
|
||||
|
||||
size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress());
|
||||
|
||||
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
|
||||
|
||||
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, device->getNEODevice()->getRootDeviceEnvironment());
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template struct CommandListCoreFamily<IGFX_XE_HPC_CORE>;
|
||||
template struct CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE>;
|
||||
|
||||
|
||||
@@ -177,6 +177,8 @@ TEST_F(CommandListCreateTests, givenNonExistingPtrThenAppendMemoryPrefetchReturn
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
|
||||
|
||||
auto res = commandList->appendMemoryPrefetch(nullptr, 0);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/source/unified_memory/usm_memory_support.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/memory_manager/mock_prefetch_manager.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
@@ -456,5 +458,160 @@ HWTEST2_F(CmdListLargeGrfTestXe2Hpg,
|
||||
testBody<FamilyType>();
|
||||
}
|
||||
|
||||
using CommandListStatePrefetchXe2HpgCore = Test<ModuleFixture>;
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenRequestMemoryPrefetchByDefault, IsXe2HpgCore) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
|
||||
memoryManager->prefetchManager.reset(new MockPrefetchManager());
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
|
||||
|
||||
size_t size = 10;
|
||||
size_t alignment = 1u;
|
||||
void *ptr = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenRequestMemoryPrefetchByDefaultWithNoPrefetchManager, IsXe2HpgCore) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
|
||||
memoryManager->prefetchManager.reset(new MockPrefetchManager());
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(true);
|
||||
|
||||
size_t size = 10;
|
||||
size_t alignment = 1u;
|
||||
void *ptr = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledAndDebugFlagFalseThenRequestMemoryPrefetchNotCalled, IsXe2HpgCore) {
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(false);
|
||||
|
||||
size_t size = 10;
|
||||
size_t alignment = 1u;
|
||||
void *ptr = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalled, IsXe2HpgCore) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
|
||||
memoryManager->prefetchManager.reset(new MockPrefetchManager());
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
|
||||
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
|
||||
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalledWithNoPrefetchManager, IsXe2HpgCore) {
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
|
||||
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
|
||||
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXe2HpgCore, givenSharedSystemAllocationOnUnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchNotCalled, IsXe2HpgCore) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
|
||||
memoryManager->prefetchManager.reset(new MockPrefetchManager());
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
|
||||
|
||||
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/source/unified_memory/usm_memory_support.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/memory_manager/mock_prefetch_manager.h"
|
||||
@@ -91,7 +92,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiAndDebuKeyDisabledIsCalledThenRequestMemoryPrefetchIsNotPerformed, IsXeHpcCore) {
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiAndDebugKeyDisabledIsCalledThenRequestMemoryPrefetchIsNotPerformed, IsXeHpcCore) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(0);
|
||||
|
||||
@@ -117,6 +118,75 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalled, IsXeHpcCore) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
|
||||
memoryManager->prefetchManager.reset(new MockPrefetchManager());
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
|
||||
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
|
||||
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchCalledWithNoPrefetchManager, IsXeHpcCore) {
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
|
||||
VariableBackup<uint64_t> sharedSystemMemCapabilities{&hwInfo.capabilityTable.sharedSystemMemCapabilities};
|
||||
sharedSystemMemCapabilities = UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::sharedSystemPageFaultEnabled;
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenSharedSystemAllocationOnUnSupportedDeviceWhenPrefetchApiIsCalledThenRequestMemoryPrefetchNotCalled, IsXeHpcCore) {
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.EnableSharedSystemUsmSupport.set(0);
|
||||
|
||||
size_t size = 10;
|
||||
void *ptr = malloc(size);
|
||||
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
result = pCommandList->appendMemoryPrefetch(ptr, size);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
|
||||
|
||||
EXPECT_FALSE(pCommandList->isMemoryPrefetchRequested());
|
||||
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigratedSharedAllocationsWhenExecutingCommandListsOnCommandQueueThenMemoryPrefetchIsCalled, IsXeHpcCore) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.UseKmdMigration.set(true);
|
||||
|
||||
Reference in New Issue
Block a user