performance: align alloc size to 2MB on XeKMD/iGPU

Related-To: NEO-15905

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-08-25 13:22:55 +00:00
committed by Compute-Runtime-Automation
parent b2ea1d98e6
commit 228da24b38
9 changed files with 101 additions and 3 deletions

View File

@@ -388,6 +388,7 @@ DECLARE_DEBUG_VARIABLE(bool, SetAssumeNotInUse, true, "Set AssumeNotInUse flag i
DECLARE_DEBUG_VARIABLE(bool, MitigateHostVisibleSignal, false, "Reset host visible signal in CB events, flush L3 when synchronize")
DECLARE_DEBUG_VARIABLE(bool, ForceZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will use share memory with CPU.")
DECLARE_DEBUG_VARIABLE(bool, DummyPageBackingEnabled, false, "When true, pass page backing flag to KMD to recover from page faults. Windows only.");
DECLARE_DEBUG_VARIABLE(bool, Disable2MBSizeAlignment, false, "Disable 2MB alignment of user allocations on iGPU/XeKMD")
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonCoherentModeForTimestamps, -1, "When active timestamp buffers are allocated in non coherent memory.")
DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")

View File

@@ -519,12 +519,13 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const A
size_t alignedStorageSize = cSize;
size_t alignedVirtualAddressRangeSize = cSize;
auto svmCpuAllocation = allocationData.type == AllocationType::svmCpu;
if (svmCpuAllocation) {
auto is2MBSizeAlignmentRequired = getDrm(allocationData.rootDeviceIndex).getIoctlHelper()->is2MBSizeAlignmentRequired(allocationData.type);
if (svmCpuAllocation || is2MBSizeAlignmentRequired) {
// add padding in case reserved addr is not aligned
auto &productHelper = getGmmHelper(allocationData.rootDeviceIndex)->getRootDeviceEnvironment().getHelper<ProductHelper>();
if (alignedStorageSize >= 2 * MemoryConstants::megaByte &&
productHelper.is2MBLocalMemAlignmentEnabled() &&
(is2MBSizeAlignmentRequired || productHelper.is2MBLocalMemAlignmentEnabled()) &&
cAlignment <= 2 * MemoryConstants::megaByte) {
alignedStorageSize = alignUp(cSize, MemoryConstants::pageSize2M);
} else {
@@ -937,13 +938,16 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &
auto gmm = std::make_unique<Gmm>(gmmHelper, allocationData.hostPtr,
allocationData.size, allocationData.alignment, CacheSettingsHelper::getGmmUsageType(allocationData.type, allocationData.flags.uncacheable, productHelper, gmmHelper->getHardwareInfo()), systemMemoryStorageInfo, gmmRequirements);
size_t bufferSize = allocationData.size;
auto &drm = getDrm(allocationData.rootDeviceIndex);
auto alignment = allocationData.alignment;
if (bufferSize >= 2 * MemoryConstants::megaByte) {
alignment = MemoryConstants::pageSize2M;
if (drm.getIoctlHelper()->is2MBSizeAlignmentRequired(allocationData.type)) {
bufferSize = alignUp(bufferSize, MemoryConstants::pageSize2M);
}
}
uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::heapStandard64KB, alignment);
auto &drm = getDrm(allocationData.rootDeviceIndex);
int ret = -1;
uint32_t handle;
auto patIndex = drm.getPatIndex(gmm.get(), allocationData.type, CacheRegion::defaultRegion, CachePolicy::writeBack, false, MemoryPoolHelper::isSystemMemoryPool(memoryPool));

View File

@@ -253,6 +253,7 @@ class IoctlHelper {
virtual void fillExtSetparamLowLatency(GemContextCreateExtSetParam &extSetparam) { return; }
virtual bool isSmallBarConfigAllowed() const = 0;
virtual bool overrideMaxSlicesSupported() const { return false; }
virtual bool is2MBSizeAlignmentRequired(AllocationType allocationType) const { return false; }
protected:
Drm &drm;

View File

@@ -1896,4 +1896,18 @@ bool IoctlHelperXe::retrieveMmapOffsetForBufferObject(BufferObject &bo, uint64_t
return true;
}
bool IoctlHelperXe::is2MBSizeAlignmentRequired(AllocationType allocationType) const {
if (debugManager.flags.Disable2MBSizeAlignment.get()) {
return false;
}
auto &rootDeviceEnvironment = drm.getRootDeviceEnvironment();
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
auto memoryManager = rootDeviceEnvironment.executionEnvironment.memoryManager.get();
if (hwInfo->capabilityTable.isIntegratedDevice) {
return memoryManager->isExternalAllocation(allocationType);
}
return false;
}
} // namespace NEO

View File

@@ -143,6 +143,7 @@ class IoctlHelperXe : public IoctlHelper {
bool isSmallBarConfigAllowed() const override { return false; }
void *pciBarrierMmap() override;
bool retrieveMmapOffsetForBufferObject(BufferObject &bo, uint64_t flags, uint64_t &offset) override;
bool is2MBSizeAlignmentRequired(AllocationType allocationType) const override;
protected:
static constexpr uint32_t maxContextSetProperties = 4;