From 3adc3c5ca05e97bb82893207a633218790b8f3e3 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Wed, 15 Sep 2021 15:46:29 +0000 Subject: [PATCH] Move allocation in device pool to a shared file Related-To: NEO-6149 Signed-off-by: Szymon Morek --- .../os_interface/linux/CMakeLists.txt | 4 +- ...drm_memory_manager_local_memory_tests.cpp} | 0 ...memory_manager_local_memory_tests_dg1.cpp} | 105 ++++++++++- .../source/os_interface/linux/CMakeLists.txt | 4 +- .../os_interface/linux/drm_memory_manager.cpp | 177 ++++++++++++++++++ ...pp => drm_memory_manager_local_memory.cpp} | 5 - ...> drm_memory_manager_local_memory_dg1.cpp} | 118 ------------ 7 files changed, 282 insertions(+), 131 deletions(-) rename opencl/test/unit_test/os_interface/linux/{drm_memory_manager_allocate_in_device_pool_tests.cpp => drm_memory_manager_local_memory_tests.cpp} (100%) rename opencl/test/unit_test/os_interface/linux/{drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp => drm_memory_manager_local_memory_tests_dg1.cpp} (93%) rename shared/source/os_interface/linux/{drm_memory_manager_allocate_in_device_pool.cpp => drm_memory_manager_local_memory.cpp} (90%) rename shared/source/os_interface/linux/{drm_memory_manager_allocate_in_device_pool_dg1.cpp => drm_memory_manager_local_memory_dg1.cpp} (54%) diff --git a/opencl/test/unit_test/os_interface/linux/CMakeLists.txt b/opencl/test/unit_test/os_interface/linux/CMakeLists.txt index 052f05db36..82a21d87b8 100644 --- a/opencl/test/unit_test/os_interface/linux/CMakeLists.txt +++ b/opencl/test/unit_test/os_interface/linux/CMakeLists.txt @@ -48,12 +48,12 @@ set(IGDRCL_SRCS_tests_os_interface_linux if(SUPPORT_DG1 AND "${BRANCH_TYPE}" STREQUAL "") list(APPEND IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_info_tests_dg1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_local_memory_tests_dg1.cpp ) else() list(APPEND IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_info_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_allocate_in_device_pool_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_local_memory_tests.cpp ) endif() if(NEO__LIBVA_FOUND) diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_local_memory_tests.cpp similarity index 100% rename from opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests.cpp rename to opencl/test/unit_test/os_interface/linux/drm_memory_manager_local_memory_tests.cpp diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_local_memory_tests_dg1.cpp similarity index 93% rename from opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp rename to opencl/test/unit_test/os_interface/linux/drm_memory_manager_local_memory_tests_dg1.cpp index 122a1319fe..506d5207a7 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_local_memory_tests_dg1.cpp @@ -951,12 +951,11 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenSupportedTypeWhenAllocatingInDevice EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress); } else { const bool prefer2MBAlignment = allocation->getUnderlyingBufferSize() >= 2 * MemoryConstants::megaByte; - const bool prefer57bitAddressing = memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !allocData.flags.resource48Bit; auto heap = HeapIndex::HEAP_STANDARD64KB; if (prefer2MBAlignment) { heap = HeapIndex::HEAP_STANDARD2MB; - } else if (prefer57bitAddressing) { + } else if (memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !allocData.flags.resource48Bit) { heap = HeapIndex::HEAP_EXTENDED; } @@ -1173,14 +1172,14 @@ TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenCustomAlignmentWhenAllocat MemoryManager::AllocationStatus allocationStatus; { - // Too small allocation, fallback to 64KB heap + // Too small allocation, fallback to 2MB heap DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(32 * MemoryConstants::megaByte); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); - EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); + EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); memoryManager->freeGraphicsMemory(allocation); } @@ -1635,4 +1634,102 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenAlignmentAndSizeWhenMmapReturnsAlig memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithLargeBufferWhenAllocatingInDevicePoolOnAllMemoryBanksThenCreateFourBufferObjectsWithDifferentGpuVirtualAddressesAndPartialSizes) { + MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; + AllocationData allocData; + allocData.allFlags = 0; + allocData.size = 18 * MemoryConstants::pageSize64k; + allocData.flags.allocateMemory = true; + allocData.type = GraphicsAllocation::AllocationType::BUFFER; + allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); + allocData.storageInfo.multiStorage = true; + allocData.rootDeviceIndex = rootDeviceIndex; + + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); + EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); + EXPECT_NE(0u, allocation->getGpuAddress()); + EXPECT_EQ(EngineLimits::maxHandleCount, allocation->getNumGmms()); + + auto drmAllocation = static_cast(allocation); + auto &bos = drmAllocation->getBOs(); + auto boAddress = drmAllocation->getGpuAddress(); + for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { + auto bo = bos[handleId]; + ASSERT_NE(nullptr, bo); + auto boSize = allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(); + EXPECT_EQ(boAddress, bo->peekAddress()); + EXPECT_EQ(boSize, bo->peekSize()); + EXPECT_EQ(boSize, handleId == 0 || handleId == 1 ? 5 * MemoryConstants::pageSize64k : 4 * MemoryConstants::pageSize64k); + boAddress += boSize; + } + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithLargeBufferWhenAllocatingInDevicePoolOnAllMemoryBanksWithoutMultiStorageThenCreateFourBufferObjectsWithDifferentGpuVirtualAddressesAndPartialSizes) { + MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; + AllocationData allocData; + allocData.allFlags = 0; + allocData.size = 3 * MemoryConstants::pageSize64k; + allocData.flags.allocateMemory = true; + allocData.type = GraphicsAllocation::AllocationType::BUFFER; + allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); + allocData.storageInfo.multiStorage = false; + allocData.rootDeviceIndex = rootDeviceIndex; + + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); + EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); + EXPECT_NE(0u, allocation->getGpuAddress()); + EXPECT_EQ(EngineLimits::maxHandleCount, allocation->getNumGmms()); + + auto drmAllocation = static_cast(allocation); + auto &bos = drmAllocation->getBOs(); + auto boAddress = drmAllocation->getGpuAddress(); + for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { + auto bo = bos[handleId]; + ASSERT_NE(nullptr, bo); + auto boSize = allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(); + EXPECT_EQ(boAddress, bo->peekAddress()); + EXPECT_EQ(boSize, bo->peekSize()); + EXPECT_EQ(boSize, 3 * MemoryConstants::pageSize64k); + } + + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithInvalidCacheRegionWhenAllocatingInDevicePoolThenReturnNullptr) { + MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; + AllocationData allocData; + allocData.allFlags = 0; + allocData.size = 18 * MemoryConstants::pageSize64k; + allocData.flags.allocateMemory = true; + allocData.type = GraphicsAllocation::AllocationType::BUFFER; + allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); + allocData.storageInfo.multiStorage = true; + allocData.rootDeviceIndex = rootDeviceIndex; + allocData.cacheRegion = 0xFFFF; + + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); + ASSERT_EQ(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithUnifiedMemoryAllocationThenReturnNullptr) { + MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; + AllocationData allocData; + allocData.allFlags = 0; + allocData.size = 18 * MemoryConstants::pageSize64k; + allocData.flags.allocateMemory = true; + allocData.type = GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY; + allocData.rootDeviceIndex = rootDeviceIndex; + + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); + ASSERT_EQ(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); + memoryManager->freeGraphicsMemory(allocation); +} } // namespace NEO diff --git a/shared/source/os_interface/linux/CMakeLists.txt b/shared/source/os_interface/linux/CMakeLists.txt index accb67666e..964dff2efc 100644 --- a/shared/source/os_interface/linux/CMakeLists.txt +++ b/shared/source/os_interface/linux/CMakeLists.txt @@ -82,13 +82,13 @@ endif() if(SUPPORT_DG1 AND "${BRANCH_TYPE}" STREQUAL "") list(APPEND NEO_CORE_OS_INTERFACE_LINUX - ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_allocate_in_device_pool_dg1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_local_memory_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_create_multi_host_allocation_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_query_dg1.cpp ) else() list(APPEND NEO_CORE_OS_INTERFACE_LINUX - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_allocate_in_device_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_local_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_create_multi_host_allocation.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_query.cpp ) diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 0382042d76..0ba2f4fede 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1112,4 +1112,181 @@ void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) { bo->setLockedAddress(nullptr); } +void createColouredGmms(GmmClientContext *clientContext, DrmAllocation &allocation, const StorageInfo &storageInfo, bool compression) { + auto remainingSize = alignUp(allocation.getUnderlyingBufferSize(), MemoryConstants::pageSize64k); + auto handles = storageInfo.getNumBanks(); + /* This logic is to colour resource as equally as possible. + Divide size by number of devices and align result up to 64kb page, then subtract it from whole size and allocate it on the first tile. First tile has it's chunk. + In the following iteration divide rest of a size by remaining devices and again subtract it. + Notice that if allocation size (in pages) is not divisible by 4 then remainder can be equal to 1,2,3 and by using this algorithm it can be spread efficiently. + + For example: 18 pages allocation and 4 devices. Page size is 64kb. + Divide by 4 and align up to page size and result is 5 pages. After subtract, remaining size is 13 pages. + Now divide 13 by 3 and align up - result is 5 pages. After subtract, remaining size is 8 pages. + Divide 8 by 2 - result is 4 pages. + In last iteration remaining 4 pages go to last tile. + 18 pages is coloured to (5, 5, 4, 4). + + It was tested and doesn't require any debug*/ + for (auto handleId = 0u; handleId < handles; handleId++) { + auto currentSize = alignUp(remainingSize / (handles - handleId), MemoryConstants::pageSize64k); + remainingSize -= currentSize; + StorageInfo limitedStorageInfo = storageInfo; + limitedStorageInfo.memoryBanks &= 1u << handleId; + auto gmm = new Gmm(clientContext, + nullptr, + currentSize, + 0u, + false, + compression, + false, + limitedStorageInfo); + allocation.setGmm(gmm, handleId); + } +} + +void fillGmmsInAllocation(GmmClientContext *clientContext, DrmAllocation *allocation, const StorageInfo &storageInfo) { + auto alignedSize = alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k); + for (auto handleId = 0u; handleId < storageInfo.getNumBanks(); handleId++) { + StorageInfo limitedStorageInfo = storageInfo; + limitedStorageInfo.memoryBanks &= 1u << handleId; + limitedStorageInfo.pageTablesVisibility &= 1u << handleId; + auto gmm = new Gmm(clientContext, nullptr, alignedSize, 0u, false, false, false, limitedStorageInfo); + allocation->setGmm(gmm, handleId); + } +} + +uint64_t getGpuAddress(const AlignmentSelector &alignmentSelector, HeapAssigner &heapAssigner, const HardwareInfo &hwInfo, GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition, + size_t &sizeAllocated, const void *hostPtr, bool resource48Bit, bool useFrontWindow) { + uint64_t gpuAddress = 0; + switch (allocType) { + case GraphicsAllocation::AllocationType::SVM_GPU: + gpuAddress = reinterpret_cast(hostPtr); + sizeAllocated = 0; + break; + case GraphicsAllocation::AllocationType::KERNEL_ISA: + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: + case GraphicsAllocation::AllocationType::INTERNAL_HEAP: + case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: { + auto heap = heapAssigner.get32BitHeapIndex(allocType, true, hwInfo, useFrontWindow); + gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heap, sizeAllocated)); + } break; + case GraphicsAllocation::AllocationType::WRITE_COMBINED: + sizeAllocated = 0; + break; + default: + AlignmentSelector::CandidateAlignment alignment = alignmentSelector.selectAlignment(sizeAllocated); + if (gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !resource48Bit) { + alignment.heap = HeapIndex::HEAP_EXTENDED; + } + gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocateWithCustomAlignment(alignment.heap, sizeAllocated, alignment.alignment)); + break; + } + return gpuAddress; +} + +GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { + status = AllocationStatus::RetryInNonDevicePool; + if (!this->localMemorySupported[allocationData.rootDeviceIndex] || + allocationData.flags.useSystemMemory || + (allocationData.flags.allow32Bit && this->force32bitAllocations) || + allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) { + return nullptr; + } + + if (allocationData.type == GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY) { + auto allocation = this->createSharedUnifiedMemoryAllocation(allocationData); + status = allocation ? AllocationStatus::Success : AllocationStatus::Error; + return allocation; + } + + std::unique_ptr gmm; + size_t sizeAligned = 0; + auto numHandles = allocationData.storageInfo.getNumBanks(); + bool createSingleHandle = 1 == numHandles; + if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE) { + allocationData.imgInfo->useLocalMemory = true; + gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo); + sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k); + } else { + if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) { + sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; + } else { + sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k); + } + if (createSingleHandle) { + gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), + nullptr, + sizeAligned, + 0u, + allocationData.flags.uncacheable, + allocationData.flags.preferRenderCompressed, + false, + allocationData.storageInfo); + } + } + + auto sizeAllocated = sizeAligned; + auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); + auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo(); + auto gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo, + allocationData.type, gfxPartition, sizeAllocated, + allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow); + + auto allocation = std::make_unique(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, gpuAddress, sizeAligned, MemoryPool::LocalMemory); + if (createSingleHandle) { + allocation->setDefaultGmm(gmm.release()); + } else if (allocationData.storageInfo.multiStorage) { + createColouredGmms(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), + *allocation, + allocationData.storageInfo, + allocationData.flags.preferRenderCompressed); + } else { + fillGmmsInAllocation(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocation.get(), allocationData.storageInfo); + } + allocation->storageInfo = allocationData.storageInfo; + allocation->setFlushL3Required(allocationData.flags.flushL3); + allocation->setUncacheable(allocationData.flags.uncacheable); + allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), sizeAllocated); + + if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) { + for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { + delete allocation->getGmm(handleId); + } + gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated); + status = AllocationStatus::Error; + return nullptr; + } + if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) { + auto cpuAddress = lockResource(allocation.get()); + auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte); + auto offset = ptrDiff(cpuAddress, alignedCpuAddress); + allocation->setAllocationOffset(offset); + allocation->setCpuPtrAndGpuAddress(cpuAddress, reinterpret_cast(alignedCpuAddress)); + DEBUG_BREAK_IF(allocation->storageInfo.multiStorage); + allocation->getBO()->setAddress(reinterpret_cast(cpuAddress)); + } + if (allocationData.flags.requiresCpuAccess) { + auto cpuAddress = lockResource(allocation.get()); + allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress); + } + if (heapAssigner.useInternal32BitHeap(allocationData.type)) { + allocation->setGpuBaseAddress(GmmHelper::canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex, true))); + } + if (!allocation->setCacheRegion(&getDrm(allocationData.rootDeviceIndex), static_cast(allocationData.cacheRegion))) { + for (auto bo : allocation->getBOs()) { + delete bo; + } + for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { + delete allocation->getGmm(handleId); + } + gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated); + status = AllocationStatus::Error; + return nullptr; + } + + status = AllocationStatus::Success; + return allocation.release(); +} + } // namespace NEO diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp b/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp similarity index 90% rename from shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp rename to shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp index 2904dc0b79..8b5e95cfe7 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp @@ -45,11 +45,6 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const return nullptr; } -GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { - status = AllocationStatus::RetryInNonDevicePool; - return nullptr; -} - void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) { return nullptr; } diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp b/shared/source/os_interface/linux/drm_memory_manager_local_memory_dg1.cpp similarity index 54% rename from shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp rename to shared/source/os_interface/linux/drm_memory_manager_local_memory_dg1.cpp index cad2afca30..c00baec8c7 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_local_memory_dg1.cpp @@ -145,49 +145,6 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData & } } -uint64_t getGpuAddress(HeapAssigner &heapAssigner, const HardwareInfo &hwInfo, GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition, - size_t &sizeAllocated, const void *hostPtr, bool resource48Bit, bool useFrontWindow) { - uint64_t gpuAddress = 0; - switch (allocType) { - case GraphicsAllocation::AllocationType::SVM_GPU: - gpuAddress = reinterpret_cast(hostPtr); - sizeAllocated = 0; - break; - case GraphicsAllocation::AllocationType::KERNEL_ISA: - case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: - case GraphicsAllocation::AllocationType::INTERNAL_HEAP: - case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: { - auto heap = heapAssigner.get32BitHeapIndex(allocType, true, hwInfo, useFrontWindow); - gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heap, sizeAllocated)); - } break; - case GraphicsAllocation::AllocationType::WRITE_COMBINED: - sizeAllocated = 0; - break; - default: - const size_t customAlignment = static_cast(DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.get()); - const bool preferCustomAlignment = customAlignment > 0 && sizeAllocated >= customAlignment; - const bool prefer2MBAlignment = DebugManager.flags.AlignLocalMemoryVaTo2MB.get() != 0 && - sizeAllocated >= 2 * MemoryConstants::megaByte && - (!preferCustomAlignment || customAlignment <= 2 * MemoryConstants::megaByte); - const bool prefer57bitAddressing = gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !resource48Bit; - - auto heapIndex = HeapIndex::HEAP_STANDARD64KB; - size_t alignment = 0u; - if (prefer2MBAlignment) { - heapIndex = HeapIndex::HEAP_STANDARD2MB; - } else if (preferCustomAlignment) { - heapIndex = customAlignment > 2 * MemoryConstants::megaByte ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD64KB; - alignment = customAlignment; - } else if (prefer57bitAddressing) { - heapIndex = HeapIndex::HEAP_EXTENDED; - } - - gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocateWithCustomAlignment(heapIndex, sizeAllocated, alignment)); - break; - } - return gpuAddress; -} - bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress, size_t maxOsContextCount) { std::array, EngineLimits::maxHandleCount> bos{}; auto &storageInfo = allocation->storageInfo; @@ -213,81 +170,6 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, return true; } -GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { - status = AllocationStatus::RetryInNonDevicePool; - if (!this->localMemorySupported[allocationData.rootDeviceIndex] || - allocationData.flags.useSystemMemory || - (allocationData.flags.allow32Bit && this->force32bitAllocations) || - allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) { - return nullptr; - } - - std::unique_ptr gmm; - size_t sizeAligned = 0; - auto numHandles = allocationData.storageInfo.getNumBanks(); - DEBUG_BREAK_IF(numHandles > 1); - if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE) { - allocationData.imgInfo->useLocalMemory = true; - gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo); - sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k); - } else { - if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) { - sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; - } else { - sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k); - } - gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), - nullptr, - sizeAligned, - 0, - allocationData.flags.uncacheable, - allocationData.flags.preferRenderCompressed, - false, - allocationData.storageInfo); - } - - auto sizeAllocated = sizeAligned; - auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); - auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo(); - auto gpuAddress = getGpuAddress(this->heapAssigner, *hwInfo, - allocationData.type, gfxPartition, sizeAllocated, - allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow); - - auto allocation = std::make_unique(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, gpuAddress, sizeAligned, MemoryPool::LocalMemory); - allocation->setDefaultGmm(gmm.release()); - allocation->storageInfo = allocationData.storageInfo; - allocation->setFlushL3Required(allocationData.flags.flushL3); - allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), sizeAllocated); - - if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress, maxOsContextCount)) { - for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { - delete allocation->getGmm(handleId); - } - gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated); - status = AllocationStatus::Error; - return nullptr; - } - if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) { - auto cpuAddress = lockResource(allocation.get()); - auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte); - auto offset = ptrDiff(cpuAddress, alignedCpuAddress); - allocation->setAllocationOffset(offset); - allocation->setCpuPtrAndGpuAddress(cpuAddress, reinterpret_cast(alignedCpuAddress)); - DEBUG_BREAK_IF(allocation->storageInfo.multiStorage); - allocation->getBO()->setAddress(reinterpret_cast(cpuAddress)); - } - if (allocationData.flags.requiresCpuAccess) { - auto cpuAddress = lockResource(allocation.get()); - allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress); - } - if (heapAssigner.useInternal32BitHeap(allocationData.type)) { - allocation->setGpuBaseAddress(GmmHelper::canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex, true))); - } - - status = AllocationStatus::Success; - return allocation.release(); -} - void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) { if (bo == nullptr) return nullptr;