mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: Make memory resident before cpu access
Related-To: NEO-13403 Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f3a7278d8f
commit
2ea3686d86
@@ -1631,15 +1631,7 @@ void *DrmMemoryManager::lockResourceImpl(GraphicsAllocation &graphicsAllocation)
|
||||
return cpuPtr;
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = graphicsAllocation.getRootDeviceIndex();
|
||||
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
|
||||
|
||||
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
|
||||
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get());
|
||||
auto graphicsAllocationPtr = &graphicsAllocation;
|
||||
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&graphicsAllocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
|
||||
DEBUG_BREAK_IF(!ret);
|
||||
}
|
||||
makeAllocationResidentIfNeeded(&graphicsAllocation);
|
||||
|
||||
auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
|
||||
if (graphicsAllocation.getAllocationType() == AllocationType::writeCombined) {
|
||||
@@ -1698,6 +1690,16 @@ Drm &DrmMemoryManager::getDrm(uint32_t rootDeviceIndex) const {
|
||||
return *this->executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as<Drm>();
|
||||
}
|
||||
|
||||
void DrmMemoryManager::makeAllocationResidentIfNeeded(GraphicsAllocation *allocation) {
|
||||
auto rootDeviceIndex = allocation->getRootDeviceIndex();
|
||||
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
|
||||
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
|
||||
auto memoryOperationsInterface = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get();
|
||||
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocation, 1), false, false, true) == MemoryOperationsStatus::success;
|
||||
DEBUG_BREAK_IF(!ret);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
|
||||
auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size();
|
||||
|
||||
@@ -2676,12 +2678,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
|
||||
auto canonizedGpuAddress = gmmHelper->canonize(bo->peekAddress());
|
||||
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, 1u /*num gmms*/, allocationData.type, bo.get(), nullptr, canonizedGpuAddress, alignedSize, memoryPool);
|
||||
|
||||
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
|
||||
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->memoryOperationsInterface.get());
|
||||
GraphicsAllocation *allocationPtr = allocation.get();
|
||||
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(allocationData.rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
|
||||
DEBUG_BREAK_IF(!ret);
|
||||
}
|
||||
makeAllocationResidentIfNeeded(allocation.get());
|
||||
|
||||
[[maybe_unused]] auto retPtr = ioctlHelper->mmapFunction(*this, cpuPointer, alignedSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
|
||||
DEBUG_BREAK_IF(retPtr != cpuPointer);
|
||||
@@ -2826,6 +2823,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
auto alignSize = alignUp(remainingSize, MemoryConstants::pageSize64k);
|
||||
auto remainingMemoryBanks = allocationData.storageInfo.memoryBanks;
|
||||
auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks());
|
||||
bool makeResidentBeforeLock = ioctlHelper->makeResidentBeforeLockNeeded();
|
||||
|
||||
bool useChunking = false;
|
||||
uint32_t numOfChunks = 0;
|
||||
@@ -2879,6 +2877,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (makeResidentBeforeLock) {
|
||||
bo->requireImmediateBinding(true);
|
||||
[[maybe_unused]] auto ret = bo->bind(getDefaultOsContext(allocationData.rootDeviceIndex), 0, false);
|
||||
DEBUG_BREAK_IF(ret != 0);
|
||||
}
|
||||
|
||||
ioctlHelper->mmapFunction(*this, currentAddress, currentSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
|
||||
|
||||
bo->setAddress(castToUint64(currentAddress));
|
||||
@@ -2902,6 +2906,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
allocation->storageInfo = allocationData.storageInfo;
|
||||
allocation->storageInfo.isChunked = useChunking;
|
||||
allocation->storageInfo.numOfChunks = numOfChunks;
|
||||
|
||||
if (makeResidentBeforeLock) {
|
||||
auto osContext = getDefaultOsContext(allocationData.rootDeviceIndex);
|
||||
allocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, osContext->getContextId());
|
||||
}
|
||||
|
||||
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
|
||||
ioctlHelper->munmapFunction(*this, cpuBasePointer, totalSizeToAlloc);
|
||||
for (auto bo : bos) {
|
||||
@@ -3039,9 +3049,6 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
|
||||
DEBUG_BREAK_IF(retPtr != cpuPointer);
|
||||
|
||||
AllocationData allocationData = {};
|
||||
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
|
||||
allocationData.size = size;
|
||||
@@ -3058,7 +3065,14 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
|
||||
|
||||
pushSharedBufferObject(bo);
|
||||
|
||||
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), memoryPool);
|
||||
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, nullptr, bo->peekAddress(), bo->peekSize(), memoryPool);
|
||||
|
||||
makeAllocationResidentIfNeeded(drmAllocation.get());
|
||||
|
||||
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
|
||||
DEBUG_BREAK_IF(retPtr != cpuPointer);
|
||||
|
||||
drmAllocation->setCpuPtrAndGpuAddress(cpuPointer, bo->peekAddress());
|
||||
drmAllocation->setMmapPtr(cpuPointer);
|
||||
drmAllocation->setMmapSize(size);
|
||||
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(cpuPointer), size);
|
||||
|
||||
@@ -158,6 +158,7 @@ class DrmMemoryManager : public MemoryManager {
|
||||
void emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const;
|
||||
uint32_t getDefaultDrmContextId(uint32_t rootDeviceIndex) const;
|
||||
OsContextLinux *getDefaultOsContext(uint32_t rootDeviceIndex) const;
|
||||
void makeAllocationResidentIfNeeded(GraphicsAllocation *allocation);
|
||||
|
||||
StorageInfo createStorageInfoFromProperties(const AllocationProperties &properties) override;
|
||||
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;
|
||||
|
||||
@@ -8540,6 +8540,44 @@ HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmap
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenMakeResidentBeforeLockNeededWhenCreateSharedUnifiedMemoryAllocationThenRequireImmediateBindingIsSetAndBindIsCalled) {
|
||||
mock->ioctlExpected.gemWait = 1;
|
||||
mock->ioctlExpected.gemClose = 1;
|
||||
mock->ioctlExpected.gemCreateExt = 1;
|
||||
mock->ioctlExpected.gemMmapOffset = 1;
|
||||
|
||||
auto mockIoctlHelper = new MockIoctlHelper(*mock);
|
||||
mockIoctlHelper->makeResidentBeforeLockNeededResult = true;
|
||||
mockIoctlHelper->callBaseVmAdviseAtomicAttribute = false;
|
||||
mockIoctlHelper->vmAdviseAtomicAttribute = std::nullopt;
|
||||
|
||||
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(rootDeviceIndex));
|
||||
|
||||
std::vector<MemoryRegion> regionInfo(1);
|
||||
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
|
||||
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
|
||||
drm.ioctlHelper.reset(mockIoctlHelper);
|
||||
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.reset(
|
||||
new DrmMemoryOperationsHandlerBind(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get(), 0));
|
||||
|
||||
AllocationData allocationData{};
|
||||
allocationData.size = MemoryConstants::pageSize64k;
|
||||
allocationData.rootDeviceIndex = rootDeviceIndex;
|
||||
allocationData.type = AllocationType::unifiedSharedMemory;
|
||||
allocationData.storageInfo.subDeviceBitfield = 0x1;
|
||||
allocationData.alignment = MemoryConstants::pageSize;
|
||||
allocationData.useMmapObject = true;
|
||||
|
||||
auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData);
|
||||
ASSERT_NE(nullptr, sharedUSM);
|
||||
|
||||
auto osContext = device->getDefaultEngine().osContext;
|
||||
EXPECT_TRUE(sharedUSM->isAlwaysResident(osContext->getContextId()));
|
||||
|
||||
memoryManager->freeGraphicsMemory(sharedUSM);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectIsCalledForSystemMemoryThenApplyCorrectFlags) {
|
||||
mock->ioctlExpected.gemMmapOffset = 8;
|
||||
BufferObject bo(rootDeviceIndex, mock, 3, 1, 1024, 0);
|
||||
|
||||
Reference in New Issue
Block a user