performance: Make memory resident before cpu access

Related-To: NEO-13403

Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
Bellekallu Rajkiran
2025-11-20 13:29:53 +00:00
committed by Compute-Runtime-Automation
parent f3a7278d8f
commit 2ea3686d86
3 changed files with 72 additions and 19 deletions

View File

@@ -1631,15 +1631,7 @@ void *DrmMemoryManager::lockResourceImpl(GraphicsAllocation &graphicsAllocation)
return cpuPtr;
}
auto rootDeviceIndex = graphicsAllocation.getRootDeviceIndex();
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get());
auto graphicsAllocationPtr = &graphicsAllocation;
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&graphicsAllocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
DEBUG_BREAK_IF(!ret);
}
makeAllocationResidentIfNeeded(&graphicsAllocation);
auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
if (graphicsAllocation.getAllocationType() == AllocationType::writeCombined) {
@@ -1698,6 +1690,16 @@ Drm &DrmMemoryManager::getDrm(uint32_t rootDeviceIndex) const {
return *this->executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as<Drm>();
}
void DrmMemoryManager::makeAllocationResidentIfNeeded(GraphicsAllocation *allocation) {
auto rootDeviceIndex = allocation->getRootDeviceIndex();
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
auto memoryOperationsInterface = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get();
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocation, 1), false, false, true) == MemoryOperationsStatus::success;
DEBUG_BREAK_IF(!ret);
}
}
uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size();
@@ -2676,12 +2678,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
auto canonizedGpuAddress = gmmHelper->canonize(bo->peekAddress());
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, 1u /*num gmms*/, allocationData.type, bo.get(), nullptr, canonizedGpuAddress, alignedSize, memoryPool);
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->memoryOperationsInterface.get());
GraphicsAllocation *allocationPtr = allocation.get();
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(allocationData.rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
DEBUG_BREAK_IF(!ret);
}
makeAllocationResidentIfNeeded(allocation.get());
[[maybe_unused]] auto retPtr = ioctlHelper->mmapFunction(*this, cpuPointer, alignedSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
DEBUG_BREAK_IF(retPtr != cpuPointer);
@@ -2826,6 +2823,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
auto alignSize = alignUp(remainingSize, MemoryConstants::pageSize64k);
auto remainingMemoryBanks = allocationData.storageInfo.memoryBanks;
auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks());
bool makeResidentBeforeLock = ioctlHelper->makeResidentBeforeLockNeeded();
bool useChunking = false;
uint32_t numOfChunks = 0;
@@ -2879,6 +2877,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
return nullptr;
}
if (makeResidentBeforeLock) {
bo->requireImmediateBinding(true);
[[maybe_unused]] auto ret = bo->bind(getDefaultOsContext(allocationData.rootDeviceIndex), 0, false);
DEBUG_BREAK_IF(ret != 0);
}
ioctlHelper->mmapFunction(*this, currentAddress, currentSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
bo->setAddress(castToUint64(currentAddress));
@@ -2902,6 +2906,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
allocation->storageInfo = allocationData.storageInfo;
allocation->storageInfo.isChunked = useChunking;
allocation->storageInfo.numOfChunks = numOfChunks;
if (makeResidentBeforeLock) {
auto osContext = getDefaultOsContext(allocationData.rootDeviceIndex);
allocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, osContext->getContextId());
}
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
ioctlHelper->munmapFunction(*this, cpuBasePointer, totalSizeToAlloc);
for (auto bo : bos) {
@@ -3039,9 +3049,6 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
return nullptr;
}
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
DEBUG_BREAK_IF(retPtr != cpuPointer);
AllocationData allocationData = {};
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
allocationData.size = size;
@@ -3058,7 +3065,14 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
pushSharedBufferObject(bo);
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), memoryPool);
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, nullptr, bo->peekAddress(), bo->peekSize(), memoryPool);
makeAllocationResidentIfNeeded(drmAllocation.get());
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
DEBUG_BREAK_IF(retPtr != cpuPointer);
drmAllocation->setCpuPtrAndGpuAddress(cpuPointer, bo->peekAddress());
drmAllocation->setMmapPtr(cpuPointer);
drmAllocation->setMmapSize(size);
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(cpuPointer), size);

View File

@@ -158,6 +158,7 @@ class DrmMemoryManager : public MemoryManager {
void emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const;
uint32_t getDefaultDrmContextId(uint32_t rootDeviceIndex) const;
OsContextLinux *getDefaultOsContext(uint32_t rootDeviceIndex) const;
void makeAllocationResidentIfNeeded(GraphicsAllocation *allocation);
StorageInfo createStorageInfoFromProperties(const AllocationProperties &properties) override;
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;

View File

@@ -8540,6 +8540,44 @@ HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmap
}
}
HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenMakeResidentBeforeLockNeededWhenCreateSharedUnifiedMemoryAllocationThenRequireImmediateBindingIsSetAndBindIsCalled) {
mock->ioctlExpected.gemWait = 1;
mock->ioctlExpected.gemClose = 1;
mock->ioctlExpected.gemCreateExt = 1;
mock->ioctlExpected.gemMmapOffset = 1;
auto mockIoctlHelper = new MockIoctlHelper(*mock);
mockIoctlHelper->makeResidentBeforeLockNeededResult = true;
mockIoctlHelper->callBaseVmAdviseAtomicAttribute = false;
mockIoctlHelper->vmAdviseAtomicAttribute = std::nullopt;
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(rootDeviceIndex));
std::vector<MemoryRegion> regionInfo(1);
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
drm.ioctlHelper.reset(mockIoctlHelper);
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.reset(
new DrmMemoryOperationsHandlerBind(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get(), 0));
AllocationData allocationData{};
allocationData.size = MemoryConstants::pageSize64k;
allocationData.rootDeviceIndex = rootDeviceIndex;
allocationData.type = AllocationType::unifiedSharedMemory;
allocationData.storageInfo.subDeviceBitfield = 0x1;
allocationData.alignment = MemoryConstants::pageSize;
allocationData.useMmapObject = true;
auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData);
ASSERT_NE(nullptr, sharedUSM);
auto osContext = device->getDefaultEngine().osContext;
EXPECT_TRUE(sharedUSM->isAlwaysResident(osContext->getContextId()));
memoryManager->freeGraphicsMemory(sharedUSM);
}
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectIsCalledForSystemMemoryThenApplyCorrectFlags) {
mock->ioctlExpected.gemMmapOffset = 8;
BufferObject bo(rootDeviceIndex, mock, 3, 1, 1024, 0);