mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
feature: Set shared system USM madvise preferred location as prefetch region
Resolves: NEO-16482 Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0f5381456d
commit
6b63304673
@@ -452,6 +452,7 @@ DECLARE_DEBUG_VARIABLE(int64_t, ForceGmmSystemMemoryBufferForAllocations, 0, "0:
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceLowLatencyHint, -1, "Force passing low latency hint during xe_exec_queue creation. -1: default, 0: disabled, 1: enabled");
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EmitMemAdvisePriorToCopyForNonUsm, -1, "Enable Memadvise to system memory for copy/fill with shared system input: -1: default, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, TreatNonUsmForTransfersAsSharedSystem, -1, "-1: default, 0: import non-usm as external host ptr on copy/fill (legacy mode), 1: treat non usm on copy/fill as shared system usm")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMadviseSharedSystemPrefetchRegion, -1, "-1: default (madvise), 0: system memory, 1: same-tile local memory")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
|
||||
|
||||
@@ -372,7 +372,7 @@ bool DrmMemoryManager::prefetchSharedSystemAlloc(const void *ptr, const size_t s
|
||||
auto memoryClassDevice = ioctlHelper->getDrmParamValue(DrmParam::memoryClassDevice);
|
||||
auto region = static_cast<uint32_t>((memoryClassDevice << 16u) | subDeviceIds[0]);
|
||||
auto vmId = drm.getVirtualMemoryAddressSpace(subDeviceIds[0]);
|
||||
return ioctlHelper->setVmPrefetch(reinterpret_cast<uint64_t>(ptr), size, region, vmId);
|
||||
return ioctlHelper->setVmSharedSystemMemPrefetch(reinterpret_cast<uint64_t>(ptr), size, region, vmId);
|
||||
}
|
||||
|
||||
bool DrmMemoryManager::setMemPrefetch(GraphicsAllocation *gfxAllocation, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) {
|
||||
|
||||
@@ -137,6 +137,7 @@ class IoctlHelper {
|
||||
virtual AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const uint32_t vmId) { return AtomicAccessMode::none; }
|
||||
virtual bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) = 0;
|
||||
virtual bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) = 0;
|
||||
virtual bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) { return true; }
|
||||
virtual bool setGemTiling(void *setTiling) = 0;
|
||||
virtual bool getGemTiling(void *setTiling) = 0;
|
||||
virtual uint32_t getDirectSubmissionFlag() = 0;
|
||||
|
||||
@@ -994,6 +994,47 @@ bool IoctlHelperXe::setVmPrefetch(uint64_t start, uint64_t length, uint32_t regi
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IoctlHelperXe::setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) {
|
||||
xeLog(" -> IoctlHelperXe::%s s=0x%llx l=0x%llx align_s=0x%llx align_l=0x%llx vmid=0x%x\n", __FUNCTION__, start, length, alignDown(start, MemoryConstants::pageSize), alignSizeWholePage(reinterpret_cast<void *>(start), length), vmId);
|
||||
drm_xe_vm_bind bind = {};
|
||||
bind.vm_id = vmId;
|
||||
bind.num_binds = 1;
|
||||
|
||||
bind.bind.range = alignSizeWholePage(reinterpret_cast<void *>(start), length);
|
||||
bind.bind.addr = alignDown(start, MemoryConstants::pageSize);
|
||||
bind.bind.op = DRM_XE_VM_BIND_OP_PREFETCH;
|
||||
|
||||
auto pHwInfo = this->drm.getRootDeviceEnvironment().getHardwareInfo();
|
||||
if (debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.get() != -1) {
|
||||
constexpr uint32_t subDeviceMaskSize = DeviceBitfield().size();
|
||||
constexpr uint32_t subDeviceMaskMax = (1u << subDeviceMaskSize) - 1u;
|
||||
uint32_t subDeviceId = region & subDeviceMaskMax;
|
||||
DeviceBitfield subDeviceMask = (debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.get() << subDeviceId);
|
||||
MemoryClassInstance regionInstanceClass = this->drm.getMemoryInfo()->getMemoryRegionClassAndInstance(subDeviceMask, *pHwInfo);
|
||||
bind.bind.prefetch_mem_region_instance = regionInstanceClass.memoryInstance;
|
||||
} else {
|
||||
bind.bind.prefetch_mem_region_instance = DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC;
|
||||
}
|
||||
|
||||
int ret = IoctlHelper::ioctl(DrmIoctl::gemVmBind, &bind);
|
||||
|
||||
xeLog(" vm=%d addr=0x%lx range=0x%lx region=0x%x operation=%d(%s) ret=%d\n",
|
||||
bind.vm_id,
|
||||
bind.bind.addr,
|
||||
bind.bind.range,
|
||||
bind.bind.prefetch_mem_region_instance,
|
||||
bind.bind.op,
|
||||
xeGetBindOperationName(bind.bind.op),
|
||||
ret);
|
||||
|
||||
if (ret != 0) {
|
||||
xeLog("error: %s ret=%d\n", xeGetBindOperationName(bind.bind.op), ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t IoctlHelperXe::getDirectSubmissionFlag() {
|
||||
xeLog(" -> IoctlHelperXe::%s\n", __FUNCTION__);
|
||||
return 0;
|
||||
|
||||
@@ -64,6 +64,7 @@ class IoctlHelperXe : public IoctlHelper {
|
||||
AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const uint32_t vmId) override;
|
||||
bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) override;
|
||||
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
|
||||
bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override;
|
||||
bool setGemTiling(void *setTiling) override;
|
||||
bool getGemTiling(void *setTiling) override;
|
||||
uint32_t getDirectSubmissionFlag() override;
|
||||
|
||||
@@ -650,6 +650,7 @@ EnableDeferBacking = 0
|
||||
ForceLowLatencyHint = -1
|
||||
EmitMemAdvisePriorToCopyForNonUsm = -1
|
||||
TreatNonUsmForTransfersAsSharedSystem = -1
|
||||
OverrideMadviseSharedSystemPrefetchRegion = -1
|
||||
SetMaxBVHLevels = -1
|
||||
GetSipBinaryFromExternalLib = -1
|
||||
LogUsmReuse = 0
|
||||
|
||||
@@ -6472,46 +6472,36 @@ HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemPrefetch
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenPrefetchSharedSystemAllocIsCalledThenReturnTrue) {
|
||||
SubDeviceIdsVec subDeviceIds{0};
|
||||
class MyMockIoctlHelper : public MockIoctlHelper {
|
||||
public:
|
||||
using MockIoctlHelper::MockIoctlHelper;
|
||||
|
||||
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
auto mockIoctlHelper = new MyMockIoctlHelper(*mock);
|
||||
|
||||
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
|
||||
drm.ioctlHelper.reset(mockIoctlHelper);
|
||||
|
||||
auto ptr = malloc(1024);
|
||||
|
||||
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, rootDeviceIndex));
|
||||
void *ptr = malloc(1024);
|
||||
|
||||
auto subDeviceIds = NEO::SubDeviceIdsVec{0};
|
||||
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, mockRootDeviceIndex));
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenPrefetchSharedSystemAllocIsCalledThenReturnFalse) {
|
||||
SubDeviceIdsVec subDeviceIds{0};
|
||||
|
||||
class MyMockIoctlHelper : public MockIoctlHelper {
|
||||
public:
|
||||
using MockIoctlHelper::MockIoctlHelper;
|
||||
|
||||
bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
|
||||
public:
|
||||
bool setVmSharedSystemMemPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) override {
|
||||
setVmSharedSystemMemPrefetchCalled++;
|
||||
return false;
|
||||
}
|
||||
uint32_t setVmSharedSystemMemPrefetchCalled = 0;
|
||||
};
|
||||
auto mockIoctlHelper = new MyMockIoctlHelper(*mock);
|
||||
|
||||
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
|
||||
drm.ioctlHelper.reset(mockIoctlHelper);
|
||||
|
||||
auto ptr = malloc(1024);
|
||||
|
||||
EXPECT_TRUE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, rootDeviceIndex));
|
||||
void *ptr = malloc(1024);
|
||||
|
||||
auto subDeviceIds = NEO::SubDeviceIdsVec{0};
|
||||
EXPECT_FALSE(memoryManager->prefetchSharedSystemAlloc(ptr, 1024, subDeviceIds, mockRootDeviceIndex));
|
||||
EXPECT_EQ(1u, mockIoctlHelper->setVmSharedSystemMemPrefetchCalled);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -3044,6 +3044,132 @@ TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmPrefetchThenVmBindIs
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, targetMemoryRegion.memoryInstance);
|
||||
}
|
||||
|
||||
struct DrmMockXePrefetchFail : public DrmMockXe {
|
||||
static auto create(RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto drm = std::unique_ptr<DrmMockXePrefetchFail>(new DrmMockXePrefetchFail{rootDeviceEnvironment});
|
||||
drm->initInstance();
|
||||
return drm;
|
||||
}
|
||||
|
||||
int ioctl(DrmIoctl request, void *arg) override {
|
||||
if (request == DrmIoctl::gemVmBind) {
|
||||
return -1;
|
||||
}
|
||||
return DrmMockXe::ioctl(request, arg);
|
||||
};
|
||||
|
||||
int gemVmAdviseReturn = 0;
|
||||
StackVec<drm_xe_madvise, 4> vmAdviseInputs;
|
||||
|
||||
protected:
|
||||
// Don't call directly, use the create() function
|
||||
DrmMockXePrefetchFail(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockXe(rootDeviceEnvironment) {}
|
||||
};
|
||||
|
||||
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchThenFailureIsReturned) {
|
||||
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
|
||||
auto drm = DrmMockXePrefetchFail::create(*executionEnvironment->rootDeviceEnvironments[0]);
|
||||
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
|
||||
|
||||
uint64_t start = 0x12u;
|
||||
uint64_t length = 0x34u;
|
||||
uint32_t subDeviceId = 0u;
|
||||
uint32_t vmId = 1u;
|
||||
|
||||
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
|
||||
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
|
||||
|
||||
EXPECT_FALSE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
|
||||
}
|
||||
|
||||
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchThenMemRegionInstanceIsMemAdvisePreferredLocation) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
|
||||
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
|
||||
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
|
||||
xeIoctlHelper->initialize();
|
||||
|
||||
uint64_t start = 0x12u;
|
||||
uint64_t length = 0x34u;
|
||||
uint32_t subDeviceId = 0u;
|
||||
uint32_t vmId = 1u;
|
||||
|
||||
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
|
||||
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
|
||||
|
||||
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
|
||||
EXPECT_EQ(1u, drm->vmBindInputs.size());
|
||||
|
||||
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, ((uint64_t)DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) & 0xffffffff);
|
||||
}
|
||||
|
||||
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchWithDebugVarThenMemRegionInstanceIsDeviceLocal) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.set(1);
|
||||
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
|
||||
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
|
||||
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
|
||||
xeIoctlHelper->initialize();
|
||||
|
||||
uint64_t start = 0x12u;
|
||||
uint64_t length = 0x34u;
|
||||
uint32_t subDeviceId = 0u;
|
||||
uint32_t vmId = 1u;
|
||||
|
||||
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
|
||||
ASSERT_NE(nullptr, memoryInfo);
|
||||
MemoryClassInstance targetMemoryRegion = memoryInfo->getLocalMemoryRegions()[subDeviceId].region;
|
||||
drm->memoryInfo.reset(memoryInfo.release());
|
||||
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
|
||||
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
|
||||
|
||||
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
|
||||
EXPECT_EQ(1u, drm->vmBindInputs.size());
|
||||
|
||||
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, targetMemoryRegion.memoryInstance);
|
||||
}
|
||||
|
||||
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmSharedSystemMemPrefetchWithDebugVarThenMemRegionInstanceIsSystem) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
debugManager.flags.OverrideMadviseSharedSystemPrefetchRegion.set(0);
|
||||
|
||||
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
|
||||
auto drm = DrmMockXe::create(*executionEnvironment->rootDeviceEnvironments[0]);
|
||||
auto xeIoctlHelper = static_cast<MockIoctlHelperXe *>(drm->getIoctlHelper());
|
||||
xeIoctlHelper->initialize();
|
||||
|
||||
uint64_t start = 0x12u;
|
||||
uint64_t length = 0x34u;
|
||||
uint32_t subDeviceId = 0u;
|
||||
uint32_t vmId = 1u;
|
||||
|
||||
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
|
||||
ASSERT_NE(nullptr, memoryInfo);
|
||||
drm->memoryInfo.reset(memoryInfo.release());
|
||||
int memoryClassDevice = static_cast<int>(DrmParam::memoryClassDevice);
|
||||
uint32_t region = (memoryClassDevice << 16u) | subDeviceId;
|
||||
|
||||
EXPECT_TRUE(xeIoctlHelper->setVmSharedSystemMemPrefetch(start, length, region, vmId));
|
||||
EXPECT_EQ(1u, drm->vmBindInputs.size());
|
||||
|
||||
EXPECT_EQ(drm->vmBindInputs[0].vm_id, vmId);
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.addr, alignDown(start, MemoryConstants::pageSize));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.range, alignSizeWholePage(reinterpret_cast<void *>(start), length));
|
||||
EXPECT_EQ(drm->vmBindInputs[0].bind.prefetch_mem_region_instance, 0u);
|
||||
}
|
||||
|
||||
TEST_F(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingSetVmPrefetchOnSecondTileThenVmBindIsCalled) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
|
||||
Reference in New Issue
Block a user