diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp index 18705cc941..7095345b5b 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp @@ -243,6 +243,59 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenDrmMemoryManagerWhenCreateBufferObj EXPECT_EQ(nullptr, bo); } +TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoWhenAllocateWithAlignmentThenGemCreateExtIsUsed) { + drm_i915_memory_region_info regionInfo[2] = {}; + regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; + regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; + + mock->memoryInfo.reset(new MemoryInfoImpl(regionInfo, 2)); + mock->ioctlCallsCount = 0; + + AllocationData allocationData; + allocationData.size = MemoryConstants::pageSize64k; + + auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); + + EXPECT_NE(allocation, nullptr); + EXPECT_EQ(1u, mock->createExt.handle); + + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndFailedMmapOffsetWhenAllocateWithAlignmentThenNullptr) { + drm_i915_memory_region_info regionInfo[2] = {}; + regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; + regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; + + mock->memoryInfo.reset(new MemoryInfoImpl(regionInfo, 2)); + mock->mmapOffsetRetVal = -1; + + AllocationData allocationData; + allocationData.size = MemoryConstants::pageSize64k; + + auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); + + EXPECT_EQ(allocation, nullptr); + mock->mmapOffsetRetVal = 0; +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndFailedGemCreateExtWhenAllocateWithAlignmentThenNullptr) { + drm_i915_memory_region_info regionInfo[2] = {}; + regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; + regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; + + mock->memoryInfo.reset(new MemoryInfoImpl(regionInfo, 2)); + mock->gemCreateExtRetVal = -1; + + AllocationData allocationData; + allocationData.size = MemoryConstants::pageSize64k; + + auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); + + EXPECT_EQ(allocation, nullptr); + mock->gemCreateExtRetVal = 0; +} + TEST_F(DrmMemoryManagerLocalMemoryTest, givenUseSystemMemoryFlagWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index f7d6e8a008..9ffe8a7485 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1257,8 +1257,8 @@ TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenLimitedRangeAllocatorSetThenH } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForAllocationWithAlignmentAndLimitedRangeAllocatorSetAndAcquireGpuRangeFailsThenNullIsReturned) { - mock->ioctl_expected.gemUserptr = 1; - mock->ioctl_expected.gemClose = 1; + mock->ioctl_expected.gemUserptr = 0; + mock->ioctl_expected.gemClose = 0; AllocationData allocationData; @@ -2784,6 +2784,39 @@ TEST_F(DrmMemoryManagerUSMHostAllocationTests, memoryManager->freeGraphicsMemoryImpl(alloc); } +TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenCallToallocateGraphicsMemoryWithAlignmentWithisHostUSMAllocationSetToTrueThenGpuAddressIsNotFromGfxPartition) { + mock->ioctl_expected.gemUserptr = 1; + mock->ioctl_expected.gemClose = 1; + + AllocationData allocationData; + allocationData.size = 16384; + allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.flags.isUSMHostAllocation = true; + allocationData.type = GraphicsAllocation::AllocationType::SVM_CPU; + auto alloc = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); + + EXPECT_NE(nullptr, alloc); + EXPECT_EQ(reinterpret_cast(alloc->getUnderlyingBuffer()), alloc->getGpuAddress()); + + memoryManager->freeGraphicsMemoryImpl(alloc); +} + +TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenMmapPtrWhenFreeGraphicsMemoryImplThenPtrIsDeallocated) { + mock->ioctl_expected.gemUserptr = 1; + mock->ioctl_expected.gemClose = 1; + + AllocationData allocationData; + allocationData.size = 16384; + allocationData.rootDeviceIndex = rootDeviceIndex; + auto alloc = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); + EXPECT_NE(nullptr, alloc); + + auto ptr = memoryManager->mmapFunction(0, alloc->getUnderlyingBufferSize(), PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0); + static_cast(alloc)->setMmapPtr(ptr); + + memoryManager->freeGraphicsMemoryImpl(alloc); +} + TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenCallToallocateGraphicsMemoryWithAlignmentWithisHostUSMAllocationSetToTrueThenTheExistingHostPointerIsUsedAndAllocationIsCreatedSuccesfully) { mock->ioctl_expected.gemUserptr = 1; @@ -3722,9 +3755,9 @@ TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedTh } TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedButFailsToReserveGpuVaThenNullAllocationIsReturned) { - mock->ioctl_expected.gemUserptr = 1; + mock->ioctl_expected.gemUserptr = 0; mock->ioctl_expected.gemWait = 0; - mock->ioctl_expected.gemClose = 1; + mock->ioctl_expected.gemClose = 0; memoryManager->getGfxPartition(rootDeviceIndex)->heapInit(HeapIndex::HEAP_STANDARD, 0, 0); diff --git a/opencl/test/unit_test/os_interface/linux/drm_mock_dg1.h b/opencl/test/unit_test/os_interface/linux/drm_mock_dg1.h index ce41e346da..2e2c5db9bb 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_mock_dg1.h +++ b/opencl/test/unit_test/os_interface/linux/drm_mock_dg1.h @@ -24,9 +24,11 @@ class DrmMockDg1 : public DrmMock { drm_i915_gem_create_ext createExt{}; drm_i915_gem_create_ext_setparam setparamRegion{}; drm_i915_gem_memory_class_instance memRegions{}; + int gemCreateExtRetVal = 0; //DRM_IOCTL_I915_GEM_MMAP_OFFSET __u64 offset = 0; + int mmapOffsetRetVal = 0; virtual int handleRemainingRequests(unsigned long request, void *arg) { if ((request == DRM_IOCTL_I915_QUERY) && (arg != nullptr)) { @@ -69,12 +71,12 @@ class DrmMockDg1 : public DrmMock { if ((this->memRegions.memory_class != I915_MEMORY_CLASS_SYSTEM) && (this->memRegions.memory_class != I915_MEMORY_CLASS_DEVICE)) { return EINVAL; } - return 0; + return gemCreateExtRetVal; } else if (request == DRM_IOCTL_I915_GEM_MMAP_OFFSET) { auto mmap_arg = static_cast(arg); mmap_arg->offset = offset; - return 0; + return mmapOffsetRetVal; } return -1; } diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index f38456e7d5..d231629f0c 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -468,7 +468,7 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData & bool useLocalMem = heapAssigner.useExternal32BitHeap(allocationData.type) ? HwHelper::get(hwInfo->platform.eRenderCoreFamily).heapInLocalMem(*hwInfo) : false; return allocate32BitGraphicsMemoryImpl(allocationData, useLocalMem); } - if (allocationData.flags.isUSMHostAllocation) { + if (allocationData.flags.isUSMHostAllocation && allocationData.hostPtr) { return allocateUSMHostGraphicsMemory(allocationData); } if (allocationData.hostPtr) { diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index e175e572bb..290b17e218 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -181,6 +181,11 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex, unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable; unifiedMemoryProperties.flags.isUSMDeviceAllocation = true; + if (memoryProperties.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { + unifiedMemoryProperties.flags.isUSMHostAllocation = true; + unifiedMemoryProperties.flags.isUSMDeviceAllocation = false; + } + GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties); if (!unifiedMemoryAllocation) { return nullptr; diff --git a/shared/source/os_interface/linux/drm_allocation.h b/shared/source/os_interface/linux/drm_allocation.h index d31bd3d603..708ca22091 100644 --- a/shared/source/os_interface/linux/drm_allocation.h +++ b/shared/source/os_interface/linux/drm_allocation.h @@ -63,6 +63,9 @@ class DrmAllocation : public GraphicsAllocation { uint64_t peekInternalHandle(MemoryManager *memoryManager) override; + void *getMmapPtr() { return this->mmapPtr; } + void setMmapPtr(void *ptr) { this->mmapPtr = ptr; } + void makeBOsResident(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); void bindBO(BufferObject *bo, OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); void bindBOs(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); @@ -72,5 +75,7 @@ class DrmAllocation : public GraphicsAllocation { protected: BufferObjects bufferObjects{}; StackVec registeredBoBindHandles; + + void *mmapPtr = nullptr; }; } // namespace NEO diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index f1aab1ab7f..916c2bf273 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -217,19 +217,6 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignment(const Alloc // It's needed to prevent overlapping pages with user pointers size_t cSize = std::max(alignUp(allocationData.size, minAlignment), minAlignment); - auto res = alignedMallocWrapper(cSize, cAlignment); - - if (!res) - return nullptr; - - std::unique_ptr bo(allocUserptr(reinterpret_cast(res), cSize, 0, allocationData.rootDeviceIndex)); - - if (!bo) { - alignedFreeWrapper(res); - return nullptr; - } - - // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required uint64_t gpuAddress = 0; size_t alignedSize = cSize; auto svmCpuAllocation = allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU; @@ -238,31 +225,51 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignment(const Alloc alignedSize = alignUp(cSize, cAlignment) + cAlignment; } - if (isLimitedRange(allocationData.rootDeviceIndex) || svmCpuAllocation) { + // if limitedRangeAlloction is enabled, memory allocation for bo in the limited Range heap is required + if ((isLimitedRange(allocationData.rootDeviceIndex) || svmCpuAllocation) && !allocationData.flags.isUSMHostAllocation) { gpuAddress = acquireGpuRange(alignedSize, false, allocationData.rootDeviceIndex, false); if (!gpuAddress) { - alignedFreeWrapper(res); return nullptr; } if (svmCpuAllocation) { - bo->gpuAddress = alignUp(gpuAddress, cAlignment); - } else { - bo->gpuAddress = gpuAddress; + gpuAddress = alignUp(gpuAddress, cAlignment); } } - emitPinningRequest(bo.get(), allocationData); + return createAllocWithAlignment(allocationData, cSize, cAlignment, alignedSize, gpuAddress); +} - auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->gpuAddress, cSize, MemoryPool::System4KBPages); +DrmAllocation *DrmMemoryManager::createAllocWithAlignmentFromUserptr(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress) { + auto res = alignedMallocWrapper(size, alignment); + if (!res) { + return nullptr; + } + + auto bo = allocUserptr(reinterpret_cast(res), size, 0, allocationData.rootDeviceIndex); + + if (!bo) { + alignedFreeWrapper(res); + return nullptr; + } + + obtainGpuAddress(allocationData, bo, gpuAddress); + emitPinningRequest(bo, allocationData); + + auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo, res, bo->gpuAddress, size, MemoryPool::System4KBPages); allocation->setDriverAllocatedCpuPtr(res); - - allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), alignedSize); - bo.release(); + allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), alignedSVMSize); return allocation; } +void DrmMemoryManager::obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress) { + if ((isLimitedRange(allocationData.rootDeviceIndex) || allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) && + !allocationData.flags.isUSMHostAllocation) { + bo->gpuAddress = gpuAddress; + } +} + DrmAllocation *DrmMemoryManager::allocateUSMHostGraphicsMemory(const AllocationData &allocationData) { const size_t minAlignment = getUserptrAlignment(); // When size == 0 allocate allocationAlignment @@ -661,6 +668,10 @@ void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) DrmAllocation *drmAlloc = static_cast(gfxAllocation); this->unregisterAllocation(gfxAllocation); + if (drmAlloc->getMmapPtr()) { + this->munmapFunction(drmAlloc->getMmapPtr(), gfxAllocation->getUnderlyingBufferSize()); + } + for (auto &engine : this->registeredEngines) { auto memoryOperationsInterface = static_cast(executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->memoryOperationsInterface.get()); memoryOperationsInterface->evictWithinOsContext(engine.osContext, *gfxAllocation); diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index 6d9b076ae6..8e1c69d87e 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -83,6 +83,9 @@ class DrmMemoryManager : public MemoryManager { DrmAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override; DrmAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) override; DrmAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override; + DrmAllocation *createAllocWithAlignmentFromUserptr(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress); + DrmAllocation *createAllocWithAlignment(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress); + void obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress); DrmAllocation *allocateUSMHostGraphicsMemory(const AllocationData &allocationData) override; DrmAllocation *allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) override; DrmAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override; diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp index 6f33945196..8e78e954df 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp @@ -17,6 +17,10 @@ BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(Drm *drm, uint6 return nullptr; } +DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress) { + return createAllocWithAlignmentFromUserptr(allocationData, size, alignment, alignedSVMSize, gpuAddress); +} + GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { status = AllocationStatus::RetryInNonDevicePool; return nullptr; diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp index f5fce0f8db..9dff13212f 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp @@ -65,6 +65,47 @@ BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(Drm *drm, return bo; } +DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress) { + if (this->getDrm(allocationData.rootDeviceIndex).getMemoryInfo()) { + std::unique_ptr bo(this->createBufferObjectInMemoryRegion(&this->getDrm(allocationData.rootDeviceIndex), 0u, alignedSVMSize, 0u, maxOsContextCount)); + + if (!bo) { + return nullptr; + } + + drm_i915_gem_mmap_offset gemMmap{}; + gemMmap.handle = bo->peekHandle(); + gemMmap.flags = I915_MMAP_OFFSET_WB; + + auto ret = this->getDrm(allocationData.rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gemMmap); + if (ret != 0) { + return nullptr; + } + + void *cpuPointer = reinterpret_cast(this->mmapFunction(0, alignedSVMSize, PROT_READ | PROT_WRITE, MAP_SHARED, getDrm(allocationData.rootDeviceIndex).getFileDescriptor(), static_cast(gemMmap.offset))); + + auto cpuBasePointer = cpuPointer; + cpuPointer = alignUp(cpuPointer, alignment); + auto offset = ptrDiff(cpuPointer, cpuBasePointer); + + bo->gpuAddress = reinterpret_cast(cpuPointer); + + obtainGpuAddress(allocationData, bo.get(), gpuAddress); + emitPinningRequest(bo.get(), allocationData); + + auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), cpuPointer, bo->gpuAddress, alignedSVMSize, MemoryPool::System4KBPages); + allocation->setMmapPtr(cpuBasePointer); + allocation->setAllocationOffset(offset); + allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), alignedSVMSize); + + bo.release(); + + return allocation; + } else { + return createAllocWithAlignmentFromUserptr(allocationData, size, alignment, alignedSVMSize, gpuAddress); + } +} + uint64_t getGpuAddress(GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition, size_t &sizeAllocated, const void *hostPtr, bool resource48Bit) { uint64_t gpuAddress = 0; switch (allocType) { diff --git a/shared/test/unit_test/mocks/linux/mock_drm_memory_manager.h b/shared/test/unit_test/mocks/linux/mock_drm_memory_manager.h index bc705b0976..fca4fc476d 100644 --- a/shared/test/unit_test/mocks/linux/mock_drm_memory_manager.h +++ b/shared/test/unit_test/mocks/linux/mock_drm_memory_manager.h @@ -64,6 +64,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate { using DrmMemoryManager::gfxPartitions; using DrmMemoryManager::lockResourceInLocalMemoryImpl; using DrmMemoryManager::memoryForPinBBs; + using DrmMemoryManager::mmapFunction; using DrmMemoryManager::pinBBs; using DrmMemoryManager::pinThreshold; using DrmMemoryManager::pushSharedBufferObject;