Add memory prefetch for kmd migrated shared allocations

This feature is disabled by default, controlled with the knob
AppendMemoryPrefetchForKmdMigratedSharedAllocations

Related-To: NEO-6740

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2022-03-02 13:38:28 +00:00
committed by Compute-Runtime-Automation
parent 10e7b9d5be
commit c0b7f05897
16 changed files with 244 additions and 0 deletions

View File

@@ -31,6 +31,20 @@ ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto allowPrefetchingKmdMigratedSharedAllocation = false;
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() != -1) {
allowPrefetchingKmdMigratedSharedAllocation = !!NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get();
}
if (allowPrefetchingKmdMigratedSharedAllocation) {
auto memoryManager = device->getDriverHandle()->getMemoryManager();
if (memoryManager->isKmdMigrationAvailable(device->getRootDeviceIndex()) &&
(allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
memoryManager->setMemPrefetch(alloc, device->getRootDeviceIndex());
}
}
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
return ZE_RESULT_SUCCESS;
}

View File

@@ -105,6 +105,137 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenDebugFlagSetWhenPrefetchApiCal
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledThenDontSetMemPrefetch, IsXeHpcCore) {
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(nullptr, ptr);
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiCalledThenDontCallSetMemPrefetchByDefault, IsXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(nullptr, ptr);
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetch, IsXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
DebugManager.flags.UseKmdMigration.set(1);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_NE(nullptr, ptr);
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
EXPECT_TRUE(memoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedDeviceMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
DebugManager.flags.UseKmdMigration.set(1);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr);
EXPECT_NE(nullptr, ptr);
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedHostMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
DebugManager.flags.UseKmdMigration.set(1);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_NE(nullptr, ptr);
auto ret = pCommandList->appendMemoryPrefetch(ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto memoryManager = static_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager());
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
context->freeMem(ptr);
}
HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenCommandBufferIsExhaustedWhenPrefetchApiCalledThenProgramStatePrefetch, IsXeHpcCore) {
using STATE_PREFETCH = typename FamilyType::STATE_PREFETCH;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;

View File

@@ -4941,6 +4941,16 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemAdviseIsCalledThenUp
}
}
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemPrefetchIsCalledThenReturnTrue) {
TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment);
BufferObject bo(mock, 1, 1024, 0);
DrmAllocation drmAllocation(0, AllocationType::UNIFIED_SHARED_MEMORY, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory);
EXPECT_EQ(&bo, drmAllocation.getBO());
EXPECT_TRUE(memoryManager.setMemPrefetch(&drmAllocation, rootDeviceIndex));
}
TEST_F(DrmMemoryManagerTest, givenPageFaultIsUnSupportedWhenCallingBindBoOnBufferAllocationThenAllocationShouldNotPageFaultAndExplicitResidencyIsNotRequired) {
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
executionEnvironment->prepareRootDeviceEnvironments(1);

View File

@@ -389,6 +389,7 @@ UseDrmVirtualEnginesForBcs = -1
LimitEngineCountForVirtualBcs = -1
LimitEngineCountForVirtualCcs = -1
ForceRunAloneContext = -1
AppendMemoryPrefetchForKmdMigratedSharedAllocations = -1
CreateContextWithAccessCounters = -1
AccessCountersTrigger = -1
AccessCountersGranularity = -1

View File

@@ -187,6 +187,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseDrmVirtualEnginesForBcs, -1, "-1: default, 0:
DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualBcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ")
DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualCcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ")
DECLARE_DEBUG_VARIABLE(int32_t, CreateContextWithAccessCounters, -1, "-1: default, 0: ignore, 1: create context with Access Counter programming")
DECLARE_DEBUG_VARIABLE(int32_t, AppendMemoryPrefetchForKmdMigratedSharedAllocations, -1, "-1: default, 0: ignore, 1: allow prefetching shared memory to the device associated with the specified command list")
DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersTrigger, -1, "-1: default - disabled, 0: disabled, >= 0: triggering thresholds")
DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersGranularity, -1, "-1: default - ACG_2MB, >= 0: granularites - 0: ACG_128K, 1: ACG_2M, 2: ACG_16M, 3: ACG_16M")
DECLARE_DEBUG_VARIABLE(int32_t, OverridePatIndex, -1, "-1: default, >=0: PatIndex to override")

View File

@@ -219,6 +219,7 @@ class MemoryManager {
virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){};
virtual bool setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) { return true; }
virtual bool setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) { return true; }
bool isExternalAllocation(AllocationType allocationType);
LocalMemoryUsageBankSelector *getLocalMemoryUsageBankSelector(AllocationType allocationType, uint32_t rootDeviceIndex);

View File

@@ -291,6 +291,20 @@ bool DrmAllocation::setMemAdvise(Drm *drm, MemAdviseFlags flags) {
return success;
}
bool DrmAllocation::setMemPrefetch(Drm *drm) {
bool success = true;
auto ioctlHelper = drm->getIoctlHelper();
for (auto bo : bufferObjects) {
if (bo != nullptr) {
auto region = static_cast<uint32_t>((I915_MEMORY_CLASS_DEVICE << 16u) | 0u);
success &= ioctlHelper->setVmPrefetch(drm, bo->peekAddress(), bo->peekSize(), region);
}
}
return success;
}
void DrmAllocation::registerMemoryToUnmap(void *pointer, size_t size, DrmAllocation::MemoryUnmapFunction unmapFunction) {
this->memoryToUnmap.push_back({pointer, size, unmapFunction});
}

View File

@@ -85,6 +85,7 @@ class DrmAllocation : public GraphicsAllocation {
void setCachePolicy(CachePolicy memType);
bool setMemAdvise(Drm *drm, MemAdviseFlags flags);
bool setMemPrefetch(Drm *drm);
void *getMmapPtr() { return this->mmapPtr; }
void setMmapPtr(void *ptr) { this->mmapPtr = ptr; }

View File

@@ -229,6 +229,12 @@ bool DrmMemoryManager::setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdvise
return drmAllocation->setMemAdvise(&this->getDrm(rootDeviceIndex), flags);
}
bool DrmMemoryManager::setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) {
auto drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);
return drmAllocation->setMemPrefetch(&this->getDrm(rootDeviceIndex));
}
NEO::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, uint32_t rootDeviceIndex) {
drm_i915_gem_userptr userptr = {};
userptr.user_ptr = address;

View File

@@ -68,6 +68,7 @@ class DrmMemoryManager : public MemoryManager {
bool isKmdMigrationAvailable(uint32_t rootDeviceIndex) override;
bool setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) override;
bool setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) override;
std::unique_lock<std::mutex> acquireAllocLock();
std::vector<GraphicsAllocation *> &getSysMemAllocs();

View File

@@ -92,6 +92,7 @@ class IoctlHelper {
virtual uint32_t getAtomicAdvise(bool isNonAtomic) = 0;
virtual uint32_t getPreferredLocationAdvise() = 0;
virtual bool setVmBoAdvise(Drm *drm, int32_t handle, uint32_t attribute, void *region) = 0;
virtual bool setVmPrefetch(Drm *drm, uint64_t start, uint64_t length, uint32_t region) = 0;
virtual uint32_t getDirectSubmissionFlag() = 0;
virtual int32_t getMemRegionsIoctlVal() = 0;
virtual int32_t getEngineInfoIoctlVal() = 0;
@@ -142,6 +143,7 @@ class IoctlHelperUpstream : public IoctlHelper {
uint32_t getAtomicAdvise(bool isNonAtomic) override;
uint32_t getPreferredLocationAdvise() override;
bool setVmBoAdvise(Drm *drm, int32_t handle, uint32_t attribute, void *region) override;
bool setVmPrefetch(Drm *drm, uint64_t start, uint64_t length, uint32_t region) override;
uint32_t getDirectSubmissionFlag() override;
int32_t getMemRegionsIoctlVal() override;
int32_t getEngineInfoIoctlVal() override;
@@ -205,6 +207,7 @@ class IoctlHelperPrelim20 : public IoctlHelper {
uint32_t getAtomicAdvise(bool isNonAtomic) override;
uint32_t getPreferredLocationAdvise() override;
bool setVmBoAdvise(Drm *drm, int32_t handle, uint32_t attribute, void *region) override;
bool setVmPrefetch(Drm *drm, uint64_t start, uint64_t length, uint32_t region) override;
uint32_t getDirectSubmissionFlag() override;
int32_t getMemRegionsIoctlVal() override;
int32_t getEngineInfoIoctlVal() override;

View File

@@ -198,6 +198,23 @@ bool IoctlHelperPrelim20::setVmBoAdvise(Drm *drm, int32_t handle, uint32_t attri
return true;
}
bool IoctlHelperPrelim20::setVmPrefetch(Drm *drm, uint64_t start, uint64_t length, uint32_t region) {
prelim_drm_i915_gem_vm_prefetch vmPrefetch{};
vmPrefetch.length = length;
vmPrefetch.region = region;
vmPrefetch.start = start;
int ret = IoctlHelper::ioctl(drm, PRELIM_DRM_IOCTL_I915_GEM_VM_PREFETCH, &vmPrefetch);
if (ret != 0) {
int err = errno;
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRELIM_DRM_I915_GEM_VM_PREFETCH) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
DEBUG_BREAK_IF(true);
return false;
}
return true;
}
uint32_t IoctlHelperPrelim20::getDirectSubmissionFlag() {
return PRELIM_I915_CONTEXT_CREATE_FLAGS_ULLS;
}

View File

@@ -101,6 +101,10 @@ bool IoctlHelperUpstream::setVmBoAdvise(Drm *drm, int32_t handle, uint32_t attri
return true;
}
bool IoctlHelperUpstream::setVmPrefetch(Drm *drm, uint64_t start, uint64_t length, uint32_t region) {
return true;
}
uint32_t IoctlHelperUpstream::getDirectSubmissionFlag() {
return 0u;
}

View File

@@ -149,6 +149,18 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
return MemoryManager::setMemAdvise(gfxAllocation, flags, rootDeviceIndex);
}
bool setMemPrefetch(GraphicsAllocation *gfxAllocation, uint32_t rootDeviceIndex) override {
setMemPrefetchCalled = true;
return MemoryManager::setMemPrefetch(gfxAllocation, rootDeviceIndex);
}
bool isKmdMigrationAvailable(uint32_t rootDeviceIndex) override {
if (DebugManager.flags.UseKmdMigration.get() != -1) {
return !!DebugManager.flags.UseKmdMigration.get();
}
return false;
}
struct CopyMemoryToAllocationBanksParams {
GraphicsAllocation *graphicsAllocation = nullptr;
size_t destinationOffset = 0u;
@@ -221,6 +233,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
bool failAllocateSystemMemory = false;
bool failAllocate32Bit = false;
bool failSetMemAdvise = false;
bool setMemPrefetchCalled = false;
bool cpuCopyRequired = false;
bool forceCompressed = false;
bool forceFailureInPrimaryAllocation = false;

View File

@@ -252,6 +252,24 @@ TEST_F(IoctlHelperPrelimFixture, givenDrmAllocationWhenSetMemAdviseWithDevicePre
EXPECT_EQ(2u, drm->ioctlCallsCount);
}
TEST_F(IoctlHelperPrelimFixture, givenDrmAllocationWhenSetMemPrefetchSucceedsThenReturnTrue) {
MockBufferObject bo(drm.get(), 0, 0, 1);
MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory);
allocation.bufferObjects[0] = &bo;
drm->ioctlRetVal = 0;
EXPECT_TRUE(allocation.setMemPrefetch(drm.get()));
}
TEST_F(IoctlHelperPrelimFixture, givenDrmAllocationWhenSetMemPrefetchFailsThenReturnFalse) {
MockBufferObject bo(drm.get(), 0, 0, 1);
MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory);
allocation.bufferObjects[0] = &bo;
drm->ioctlRetVal = EINVAL;
EXPECT_FALSE(allocation.setMemPrefetch(drm.get()));
}
TEST_F(IoctlHelperPrelimFixture, givenVariousDirectSubmissionFlagSettingWhenCreateDrmContextIsCalledThenCorrectFlagsArePassedToIoctl) {
DebugManagerStateRestore stateRestore;
uint32_t vmId = 0u;

View File

@@ -169,6 +169,15 @@ TEST(IoctlHelperTestsUpstream, givenUpstreamWhenSetVmBoAdviseThenReturnTrue) {
EXPECT_TRUE(ioctlHelper->setVmBoAdvise(drm.get(), 0, 0, nullptr));
}
TEST(IoctlHelperTestsUpstream, givenUpstreamWhenSetVmPrefetchThenReturnTrue) {
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
executionEnvironment->prepareRootDeviceEnvironments(1);
auto drm = std::make_unique<DrmTipMock>(*executionEnvironment->rootDeviceEnvironments[0]);
auto ioctlHelper = drm->getIoctlHelper();
EXPECT_TRUE(ioctlHelper->setVmPrefetch(drm.get(), 0, 0, 0));
}
TEST(IoctlHelperTestsUpstream, givenUpstreamWhenDirectSubmissionEnabledThenNoFlagsAdded) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.DirectSubmissionDrmContext.set(1);