From 228da24b38a2f44fb1f4b81506c0b91e5789d645 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Mon, 25 Aug 2025 13:22:55 +0000 Subject: [PATCH] performance: align alloc size to 2MB on XeKMD/iGPU Related-To: NEO-15905 Signed-off-by: Szymon Morek --- .../debug_settings/debug_variables_base.inl | 1 + .../os_interface/linux/drm_memory_manager.cpp | 10 +++-- .../source/os_interface/linux/ioctl_helper.h | 1 + .../os_interface/linux/xe/ioctl_helper_xe.cpp | 14 ++++++ .../os_interface/linux/xe/ioctl_helper_xe.h | 1 + .../common/mocks/linux/mock_ioctl_helper.h | 6 +++ shared/test/common/test_files/igdrcl.config | 1 + .../linux/drm_memory_manager_tests.cpp | 45 +++++++++++++++++++ .../linux/xe/ioctl_helper_xe_tests.cpp | 25 +++++++++++ 9 files changed, 101 insertions(+), 3 deletions(-) diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 2074899896..c9f39b10dc 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -388,6 +388,7 @@ DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag i DECLARE_DEBUG_VARIABLE(bool, MitigateHostVisibleSignal, false, "Reset host visible signal in CB events, flush L3 when synchronize") DECLARE_DEBUG_VARIABLE(bool, ForceZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will use share memory with CPU.") DECLARE_DEBUG_VARIABLE(bool, DummyPageBackingEnabled, false, "When true, pass page backing flag to KMD to recover from page faults. Windows only."); +DECLARE_DEBUG_VARIABLE(bool, Disable2MBSizeAlignment, false, "Disable 2MB alignment of user allocations on iGPU/XeKMD") DECLARE_DEBUG_VARIABLE(int32_t, ForceNonCoherentModeForTimestamps, -1, "When active timestamp buffers are allocated in non coherent memory.") DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object") diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 49926ee176..3bee79eac1 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -519,12 +519,13 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const A size_t alignedStorageSize = cSize; size_t alignedVirtualAddressRangeSize = cSize; auto svmCpuAllocation = allocationData.type == AllocationType::svmCpu; - if (svmCpuAllocation) { + auto is2MBSizeAlignmentRequired = getDrm(allocationData.rootDeviceIndex).getIoctlHelper()->is2MBSizeAlignmentRequired(allocationData.type); + if (svmCpuAllocation || is2MBSizeAlignmentRequired) { // add padding in case reserved addr is not aligned auto &productHelper = getGmmHelper(allocationData.rootDeviceIndex)->getRootDeviceEnvironment().getHelper(); if (alignedStorageSize >= 2 * MemoryConstants::megaByte && - productHelper.is2MBLocalMemAlignmentEnabled() && + (is2MBSizeAlignmentRequired || productHelper.is2MBLocalMemAlignmentEnabled()) && cAlignment <= 2 * MemoryConstants::megaByte) { alignedStorageSize = alignUp(cSize, MemoryConstants::pageSize2M); } else { @@ -937,13 +938,16 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData & auto gmm = std::make_unique(gmmHelper, allocationData.hostPtr, allocationData.size, allocationData.alignment, CacheSettingsHelper::getGmmUsageType(allocationData.type, allocationData.flags.uncacheable, productHelper, gmmHelper->getHardwareInfo()), systemMemoryStorageInfo, gmmRequirements); size_t bufferSize = allocationData.size; + auto &drm = getDrm(allocationData.rootDeviceIndex); auto alignment = allocationData.alignment; if (bufferSize >= 2 * MemoryConstants::megaByte) { alignment = MemoryConstants::pageSize2M; + if (drm.getIoctlHelper()->is2MBSizeAlignmentRequired(allocationData.type)) { + bufferSize = alignUp(bufferSize, MemoryConstants::pageSize2M); + } } uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::heapStandard64KB, alignment); - auto &drm = getDrm(allocationData.rootDeviceIndex); int ret = -1; uint32_t handle; auto patIndex = drm.getPatIndex(gmm.get(), allocationData.type, CacheRegion::defaultRegion, CachePolicy::writeBack, false, MemoryPoolHelper::isSystemMemoryPool(memoryPool)); diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index 4f49ece523..421dc37d95 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -253,6 +253,7 @@ class IoctlHelper { virtual void fillExtSetparamLowLatency(GemContextCreateExtSetParam &extSetparam) { return; } virtual bool isSmallBarConfigAllowed() const = 0; virtual bool overrideMaxSlicesSupported() const { return false; } + virtual bool is2MBSizeAlignmentRequired(AllocationType allocationType) const { return false; } protected: Drm &drm; diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp index 3d937f7337..5db8c73e2c 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp @@ -1896,4 +1896,18 @@ bool IoctlHelperXe::retrieveMmapOffsetForBufferObject(BufferObject &bo, uint64_t return true; } +bool IoctlHelperXe::is2MBSizeAlignmentRequired(AllocationType allocationType) const { + if (debugManager.flags.Disable2MBSizeAlignment.get()) { + return false; + } + + auto &rootDeviceEnvironment = drm.getRootDeviceEnvironment(); + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto memoryManager = rootDeviceEnvironment.executionEnvironment.memoryManager.get(); + if (hwInfo->capabilityTable.isIntegratedDevice) { + return memoryManager->isExternalAllocation(allocationType); + } + return false; +} + } // namespace NEO diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h index b120ca3aff..0b6573bcce 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h @@ -143,6 +143,7 @@ class IoctlHelperXe : public IoctlHelper { bool isSmallBarConfigAllowed() const override { return false; } void *pciBarrierMmap() override; bool retrieveMmapOffsetForBufferObject(BufferObject &bo, uint64_t flags, uint64_t &offset) override; + bool is2MBSizeAlignmentRequired(AllocationType allocationType) const override; protected: static constexpr uint32_t maxContextSetProperties = 4; diff --git a/shared/test/common/mocks/linux/mock_ioctl_helper.h b/shared/test/common/mocks/linux/mock_ioctl_helper.h index f83069860c..209b24583e 100644 --- a/shared/test/common/mocks/linux/mock_ioctl_helper.h +++ b/shared/test/common/mocks/linux/mock_ioctl_helper.h @@ -90,6 +90,11 @@ class MockIoctlHelper : public IoctlHelperPrelim20 { topologyMap = topologyMapToSet; return true; } + + bool is2MBSizeAlignmentRequired(AllocationType allocationType) const override { + return is2MBSizeAlignmentRequiredResult; + } + DrmQueryTopologyData topologyDataToSet{}; TopologyMap topologyMapToSet{}; int getDrmParamValueResult = 1234; @@ -98,6 +103,7 @@ class MockIoctlHelper : public IoctlHelperPrelim20 { bool releaseInterruptResult = true; bool callBaseVmAdviseAtomicAttribute = true; + bool is2MBSizeAlignmentRequiredResult = false; std::optional vmAdviseAtomicAttribute{}; }; } // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 422f62abe4..0600f0a4ff 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -668,5 +668,6 @@ SplitBcsRequiredTileCount = -1 SplitBcsRequiredEnginesCount = -1 SplitBcsTransferDirectionMask = -1 EnableShareableWithoutNTHandle = -1 +Disable2MBSizeAlignment = 0 InOrderCopyMiFlushSync = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index a968978118..34297be4d9 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -9211,4 +9211,49 @@ TEST_F(DrmMemoryManagerTest, givenMmapAlignmentLessOrEqual2MBWhenAllocatingGraph EXPECT_NE(nullptr, allocation); EXPECT_EQ(2 * MemoryConstants::megaByte, memoryManager->passedAlignment); memoryManager->freeGraphicsMemory(allocation); +} + +HWTEST_TEMPLATED_F(DrmMemoryManagerTest, given2MBAlignmentRequiredWhenUnalignedSizePassedToAllocateWithAlignmentThenSizeAlignedTo2MB) { + mock->ioctlExpected.gemUserptr = 1; + mock->ioctlExpected.gemWait = 1; + mock->ioctlExpected.gemClose = 1; + auto mockIoctlHelper = new MockIoctlHelper(*mock); + auto &drm = static_cast(memoryManager->getDrm(mockRootDeviceIndex)); + drm.ioctlHelper.reset(mockIoctlHelper); + + AllocationData allocationData; + allocationData.size = 4 * MemoryConstants::megaByte + 1 * MemoryConstants::megaByte; + allocationData.alignment = MemoryConstants::pageSize64k; + allocationData.type = AllocationType::buffer; + allocationData.rootDeviceIndex = mockRootDeviceIndex; + + mockIoctlHelper->is2MBSizeAlignmentRequiredResult = true; + auto allocation = memoryManager->allocateGraphicsMemoryWithAlignmentImpl(allocationData); + + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(2 * MemoryConstants::megaByte, memoryManager->passedAlignment); + EXPECT_EQ(6 * MemoryConstants::megaByte, allocation->getReservedAddressSize()); + memoryManager->freeGraphicsMemory(allocation); +} + +HWTEST_TEMPLATED_F(DrmMemoryManagerTest, given2MBAlignmentRequiredWhenUnalignedSizePassedToAllocateByKmdThenSizeAlignedTo2MB) { + mock->ioctlExpected.gemCreate = 1; + mock->ioctlExpected.gemWait = 1; + mock->ioctlExpected.gemClose = 1; + auto mockIoctlHelper = new MockIoctlHelper(*mock); + auto &drm = static_cast(memoryManager->getDrm(mockRootDeviceIndex)); + drm.ioctlHelper.reset(mockIoctlHelper); + + AllocationData allocationData; + allocationData.size = 4 * MemoryConstants::megaByte + 1 * MemoryConstants::megaByte; + allocationData.alignment = MemoryConstants::pageSize64k; + allocationData.type = AllocationType::buffer; + allocationData.rootDeviceIndex = mockRootDeviceIndex; + + mockIoctlHelper->is2MBSizeAlignmentRequiredResult = true; + auto allocation = memoryManager->allocateMemoryByKMD(allocationData); + + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(6 * MemoryConstants::megaByte, allocation->getReservedAddressSize()); + memoryManager->freeGraphicsMemory(allocation); } \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp index 964aedb5d0..9f9416cac6 100644 --- a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp @@ -3093,3 +3093,28 @@ TEST_F(IoctlHelperXeTest, givenXeIoctlHelperWhenCallingOverrideMaxSlicesSupporte auto xeIoctlHelper = std::make_unique(drm); EXPECT_FALSE(xeIoctlHelper->overrideMaxSlicesSupported()); } + +TEST_F(IoctlHelperXeTest, givenXeIoctlHelperWhenCallingIs2MBSizeAlignmentRequiredThenProperValueReturned) { + DebugManagerStateRestore restorer{}; + auto executionEnvironment = std::make_unique(); + auto &rootDeviceEnvironment = *executionEnvironment->rootDeviceEnvironments[0]; + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::make_unique(mockFd, rootDeviceEnvironment)); + auto drm = DrmMockXe::create(rootDeviceEnvironment); + auto xeIoctlHelper = static_cast(drm->getIoctlHelper()); + xeIoctlHelper->initialize(); + executionEnvironment->memoryManager.reset(new TestedDrmMemoryManager{*executionEnvironment}); + + auto hwInfo = drm->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.isIntegratedDevice = true; + EXPECT_TRUE(xeIoctlHelper->is2MBSizeAlignmentRequired(AllocationType::buffer)); + EXPECT_FALSE(xeIoctlHelper->is2MBSizeAlignmentRequired(AllocationType::linearStream)); + + hwInfo->capabilityTable.isIntegratedDevice = false; + EXPECT_FALSE(xeIoctlHelper->is2MBSizeAlignmentRequired(AllocationType::buffer)); + EXPECT_FALSE(xeIoctlHelper->is2MBSizeAlignmentRequired(AllocationType::linearStream)); + + hwInfo->capabilityTable.isIntegratedDevice = true; + debugManager.flags.Disable2MBSizeAlignment.set(true); + EXPECT_FALSE(xeIoctlHelper->is2MBSizeAlignmentRequired(AllocationType::buffer)); +}