feature: Support for pStart

Related-To: NEO-15156, GSD-9939

Support for start address hint in zeVirtualMemReserve.

If it fails to find pStart then it defaults to the base line
allocateWithCustomAlignment(...)

Signed-off-by: Chandio, Bibrak Qamar <bibrak.qamar.chandio@intel.com>
This commit is contained in:
Chandio, Bibrak Qamar
2025-08-01 23:43:17 +00:00
committed by Compute-Runtime-Automation
parent 01a4769141
commit 47caeda487
26 changed files with 1256 additions and 182 deletions

View File

@@ -111,24 +111,26 @@ void GfxPartition::Heap::init(uint64_t base, uint64_t size, size_t allocationAli
heapGranularity = GfxPartition::heapGranularity2MB;
}
// Exclude very first and very last 64K from GPU address range allocation
// Exclude very first and very last page from GPU address range allocation
if (size > 2 * heapGranularity) {
size -= 2 * heapGranularity;
}
alloc = std::make_unique<HeapAllocator>(base + heapGranularity, size, allocationAlignment);
initialized = true;
}
void GfxPartition::Heap::initExternalWithFrontWindow(uint64_t base, uint64_t size) {
void GfxPartition::Heap::initExternalWithFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment) {
this->base = base;
this->size = size;
size -= GfxPartition::heapGranularity;
alloc = std::make_unique<HeapAllocator>(base, size, MemoryConstants::pageSize, 0u);
alloc = std::make_unique<HeapAllocator>(base, size, allocationAlignment, 0u);
initialized = true;
}
void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize) {
void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment) {
this->base = base;
this->size = size;
@@ -136,24 +138,38 @@ void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint6
size -= GfxPartition::heapGranularity;
size -= frontWindowSize;
alloc = std::make_unique<HeapAllocator>(base + frontWindowSize, size, MemoryConstants::pageSize);
alloc = std::make_unique<HeapAllocator>(base + frontWindowSize, size, allocationAlignment);
initialized = true;
}
void GfxPartition::Heap::initFrontWindow(uint64_t base, uint64_t size) {
void GfxPartition::Heap::initFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment) {
this->base = base;
this->size = size;
alloc = std::make_unique<HeapAllocator>(base, size, MemoryConstants::pageSize, 0u);
alloc = std::make_unique<HeapAllocator>(base, size, allocationAlignment, 0u);
initialized = true;
}
size_t GfxPartition::Heap::getAllocAlignment() const {
return alloc->getAllocationAlignment();
}
uint64_t GfxPartition::Heap::allocate(size_t &size) {
return alloc->allocate(size);
}
uint64_t GfxPartition::Heap::allocateWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size) {
return alloc->allocateWithStartAddressHint(requiredStartAddress, size);
}
uint64_t GfxPartition::Heap::allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) {
return alloc->allocateWithCustomAlignment(sizeToAllocate, alignment);
}
uint64_t GfxPartition::Heap::allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment) {
return alloc->allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, sizeToAllocate, alignment);
}
void GfxPartition::Heap::free(uint64_t ptr, size_t size) {
alloc->free(ptr, size);
}
@@ -248,7 +264,7 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe
auto cpuVirtualAddressSize = CpuInfo::getInstance().getVirtualAddressSize();
if (cpuVirtualAddressSize == 48 && gpuAddressSpace == maxNBitValue(48)) {
gfxBase = maxNBitValue(48 - 1) + 1;
heapInit(HeapIndex::heapSvm, 0ull, gfxBase);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gfxBase, MemoryConstants::pageSize2M);
} else if (gpuAddressSpace == maxNBitValue(47)) {
if (reservedCpuAddressRangeForHeapSvm.alignedPtr == nullptr) {
if (cpuAddressRangeSizeToReserve == 0) {
@@ -264,10 +280,10 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe
}
gfxBase = reinterpret_cast<uint64_t>(reservedCpuAddressRangeForHeapSvm.alignedPtr);
gfxTop = gfxBase + cpuAddressRangeSizeToReserve;
heapInit(HeapIndex::heapSvm, 0ull, gpuAddressSpace + 1);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gpuAddressSpace + 1, MemoryConstants::pageSize2M);
} else if (gpuAddressSpace < maxNBitValue(47)) {
gfxBase = 0ull;
heapInit(HeapIndex::heapSvm, 0ull, 0ull);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, 0ull, MemoryConstants::pageSize2M);
} else {
if (!initAdditionalRange(cpuVirtualAddressSize, gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex, systemMemorySize)) {
return false;
@@ -277,14 +293,14 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe
for (auto heap : GfxPartition::heap32Names) {
if (useExternalFrontWindowPool && HeapAssigner::heapTypeExternalWithFrontWindowPool(heap)) {
heapInitExternalWithFrontWindow(heap, gfxBase, gfxHeap32Size);
heapInitExternalWithFrontWindow(heap, gfxBase, gfxHeap32Size, MemoryConstants::pageSize);
size_t externalFrontWindowSize = GfxPartition::externalFrontWindowPoolSize;
auto allocation = heapAllocate(heap, externalFrontWindowSize);
heapInitExternalWithFrontWindow(HeapAssigner::mapExternalWindowIndex(heap), allocation,
externalFrontWindowSize);
externalFrontWindowSize, MemoryConstants::pageSize);
} else if (HeapAssigner::isInternalHeap(heap)) {
heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize);
heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize);
heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize);
heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize);
} else {
heapInit(heap, gfxBase, gfxHeap32Size);
}
@@ -368,9 +384,9 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t
gfxBase = castToUint64(reservedCpuAddressRangeForHeapSvm.alignedPtr);
gfxTop = gfxBase + reservedCpuAddressRangeForHeapSvm.sizeToReserve;
if (gpuAddressSpace == maxNBitValue(57)) {
heapInit(HeapIndex::heapSvm, 0ull, maxNBitValue(57 - 1) + 1);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, maxNBitValue(57 - 1) + 1, MemoryConstants::pageSize2M);
} else {
heapInit(HeapIndex::heapSvm, 0ull, maxNBitValue(48) + 1);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, maxNBitValue(48) + 1, MemoryConstants::pageSize2M);
}
if (gpuAddressSpace == maxNBitValue(57)) {
@@ -384,7 +400,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t
// On 48 bit CPU this range is reserved for OS usage, do not reserve
gfxBase = maxNBitValue(48 - 1) + 1; // 0x800000000000
gfxTop = maxNBitValue(48) + 1; // 0x1000000000000
heapInit(HeapIndex::heapSvm, 0ull, gfxBase);
heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gfxBase, MemoryConstants::pageSize2M);
}
// Init HEAP_EXTENDED only for 57 bit GPU

View File

@@ -49,26 +49,34 @@ class GfxPartition {
getHeap(heapIndex).init(base, size, allocationAlignment);
}
void heapInitExternalWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size) {
getHeap(heapIndex).initExternalWithFrontWindow(base, size);
void heapInitExternalWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) {
getHeap(heapIndex).initExternalWithFrontWindow(base, size, allocationAlignment);
}
void heapInitWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, uint64_t frontWindowSize) {
getHeap(heapIndex).initWithFrontWindow(base, size, frontWindowSize);
void heapInitWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment) {
getHeap(heapIndex).initWithFrontWindow(base, size, frontWindowSize, allocationAlignment);
}
void heapInitFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size) {
getHeap(heapIndex).initFrontWindow(base, size);
void heapInitFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) {
getHeap(heapIndex).initFrontWindow(base, size, allocationAlignment);
}
MOCKABLE_VIRTUAL uint64_t heapAllocate(HeapIndex heapIndex, size_t &size) {
return getHeap(heapIndex).allocate(size);
}
MOCKABLE_VIRTUAL uint64_t heapAllocateWithStartAddressHint(const uint64_t requiredStartAddress, HeapIndex heapIndex, size_t &size) {
return getHeap(heapIndex).allocateWithStartAddressHint(requiredStartAddress, size);
}
MOCKABLE_VIRTUAL uint64_t heapAllocateWithCustomAlignment(HeapIndex heapIndex, size_t &size, size_t alignment) {
return getHeap(heapIndex).allocateWithCustomAlignment(size, alignment);
}
MOCKABLE_VIRTUAL uint64_t heapAllocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, HeapIndex heapIndex, size_t &size, size_t alignment) {
return getHeap(heapIndex).allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, size, alignment);
}
MOCKABLE_VIRTUAL void heapFree(HeapIndex heapIndex, uint64_t ptr, size_t size) {
getHeap(heapIndex).free(ptr, size);
}
@@ -83,8 +91,31 @@ class GfxPartition {
return getHeap(heapIndex).getLimit();
}
size_t getHeapAllocationAlignment(HeapIndex heapIndex) {
return getHeap(heapIndex).getAllocAlignment();
}
bool isHeapInitialized(HeapIndex heapIndex) {
return getHeap(heapIndex).isInitialized();
}
uint64_t getHeapMinimalAddress(HeapIndex heapIndex);
MOCKABLE_VIRTUAL bool getHeapIndexAndPageSizeBasedOnAddress(uint64_t ptr, HeapIndex &heapIndex, size_t &pageSize) {
for (size_t index = 0; index < heaps.size(); ++index) {
if (!isHeapInitialized(static_cast<HeapIndex>(index))) {
continue;
}
if (isAddressInHeapRange(static_cast<HeapIndex>(index), ptr)) {
heapIndex = static_cast<HeapIndex>(index);
pageSize = getHeapAllocationAlignment(heapIndex);
return true;
}
}
return false;
}
bool isLimitedRange() { return getHeap(HeapIndex::heapSvm).getSize() == 0ull; }
static bool isAnyHeap32(HeapIndex heapIndex) {
@@ -110,25 +141,34 @@ class GfxPartition {
public:
Heap() = default;
void init(uint64_t base, uint64_t size, size_t allocationAlignment);
void initExternalWithFrontWindow(uint64_t base, uint64_t size);
void initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize);
void initFrontWindow(uint64_t base, uint64_t size);
void initExternalWithFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment);
void initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment);
void initFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment);
uint64_t getBase() const { return base; }
uint64_t getSize() const { return size; }
uint64_t getLimit() const { return size ? base + size - 1 : 0; }
size_t getAllocAlignment() const;
uint64_t allocate(size_t &size);
uint64_t allocateWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size);
uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment);
uint64_t allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment);
void free(uint64_t ptr, size_t size);
bool isInitialized() const { return initialized; }
protected:
uint64_t base = 0, size = 0;
std::unique_ptr<HeapAllocator> alloc;
bool initialized = false;
};
Heap &getHeap(HeapIndex heapIndex) {
return heaps[static_cast<uint32_t>(heapIndex)];
}
bool isAddressInHeapRange(HeapIndex heapIndex, uint64_t ptr) {
return (ptr >= getHeap(heapIndex).getBase()) && (ptr <= getHeap(heapIndex).getLimit());
}
std::array<Heap, static_cast<uint32_t>(HeapIndex::totalHeaps)> heaps;
OSMemory::ReservedCpuAddressRange &reservedCpuAddressRangeForHeapSvm;

View File

@@ -262,6 +262,7 @@ class MemoryManager {
virtual AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) = 0;
virtual AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) = 0;
virtual size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) = 0;
virtual size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) = 0;
virtual void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) = 0;
virtual AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) = 0;
AddressRange reserveCpuAddressWithZeroBaseRetry(const uint64_t requiredStartAddress, size_t size);

View File

@@ -647,12 +647,29 @@ MemoryAllocation *OsAgnosticMemoryManager::createMemoryAllocation(AllocationType
}
size_t OsAgnosticMemoryManager::selectAlignmentAndHeap(size_t size, HeapIndex *heap) {
*heap = HeapIndex::heapStandard;
return MemoryConstants::pageSize64k;
return selectAlignmentAndHeap(0ULL, size, heap);
}
size_t OsAgnosticMemoryManager::selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) {
// Always default to HEAP STANDARD 2MB.
*heap = HeapIndex::heapStandard2MB;
size_t pageSizeAlignment = MemoryConstants::pageSize2M;
// If the user provides a start address, we try to find the heap and page size alignment based on that address.
if (requiredStartAddress != 0ULL) {
auto rootDeviceIndex = 0u;
auto gfxPartition = getGfxPartition(rootDeviceIndex);
if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) {
return pageSizeAlignment;
}
}
return pageSizeAlignment;
}
AddressRange OsAgnosticMemoryManager::reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) {
return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard, MemoryConstants::pageSize64k);
return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard2MB, MemoryConstants::pageSize2M);
}
AddressRange OsAgnosticMemoryManager::reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) {
@@ -661,7 +678,7 @@ AddressRange OsAgnosticMemoryManager::reserveGpuAddressOnHeap(const uint64_t req
for (auto rootDeviceIndex : rootDeviceIndices) {
auto gfxPartition = getGfxPartition(rootDeviceIndex);
auto gmmHelper = getGmmHelper(rootDeviceIndex);
gpuVa = gmmHelper->canonize(gfxPartition->heapAllocate(heap, size));
gpuVa = requiredStartAddress == 0 ? gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignment(heap, size, alignment)) : gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignmentWithStartAddressHint(gmmHelper->decanonize(requiredStartAddress), heap, size, alignment));
if (gpuVa != 0u) {
*reservedOnRootDeviceIndex = rootDeviceIndex;
break;

View File

@@ -47,6 +47,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) override;
AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override;
size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override;
size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override;
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override;
AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override;
void freeCpuAddress(AddressRange addressRange) override;