mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 23:24:53 +08:00
Use GfxPartition for GPU address range allocations
[2/n] - OsAgnosticMemoryManager Related-To: NEO-2877 Change-Id: I887126362381ac960608a2150fae211631d3cd5b Signed-off-by: Venevtsev, Igor <igor.venevtsev@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
bb6dfd4fe6
commit
165d1e4e55
@@ -115,7 +115,7 @@ class Device : public BaseObject<_cl_device_id> {
|
||||
const HardwareCapabilities &getHardwareCapabilities() const { return hardwareCapabilities; }
|
||||
uint32_t getDeviceIndex() const { return deviceIndex; }
|
||||
bool isFullRangeSvm() const {
|
||||
return getHardwareInfo().capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress;
|
||||
return executionEnvironment->isFullRangeSvm();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -11,7 +11,13 @@
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
size_t getSizeToMap() {
|
||||
return static_cast<size_t>(alignUp(4 * GB - 8096, 4096));
|
||||
}
|
||||
|
||||
size_t getSizeToReserve() {
|
||||
return maxNBitValue<47> / 4;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -54,7 +54,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
|
||||
const HardwareInfo *getHardwareInfo() const { return hwInfo.get(); }
|
||||
HardwareInfo *getMutableHardwareInfo() const { return hwInfo.get(); }
|
||||
bool isFullRangeSvm() const {
|
||||
return hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress;
|
||||
return hwInfo->capabilityTable.gpuAddressSpace >= maxNBitValue<47>;
|
||||
}
|
||||
|
||||
GmmHelper *getGmmHelper() const;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "runtime/memory_manager/gfx_partition.h"
|
||||
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/os_interface/os_memory.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -16,33 +17,115 @@ const std::array<HeapIndex, 4> GfxPartition::heap32Names{{HeapIndex::HEAP_INTERN
|
||||
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_EXTERNAL}};
|
||||
|
||||
void GfxPartition::init(uint64_t gpuAddressSpace) {
|
||||
const std::array<HeapIndex, 6> GfxPartition::heapNonSvmNames{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_INTERNAL,
|
||||
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_EXTERNAL,
|
||||
HeapIndex::HEAP_STANDARD,
|
||||
HeapIndex::HEAP_STANDARD64KB}};
|
||||
GfxPartition::~GfxPartition() {
|
||||
if (reservedCpuAddressRange) {
|
||||
OSMemory::releaseCpuAddressRange(reservedCpuAddressRange, reservedCpuAddressRangeSize);
|
||||
}
|
||||
}
|
||||
|
||||
// 1) Full Range SVM gfx layout:
|
||||
//
|
||||
// SVM H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
// |__________________________________|____|____|____|____|________________|______________|
|
||||
// | | | | | | | |
|
||||
// | gfxBase gfxTop
|
||||
// 0x0 0x0000800000000000/0x10000000 for 32 bit 0x0000FFFFFFFFFFFFFFFF
|
||||
//
|
||||
// 2) Limited Range gfx layout (no SVM):
|
||||
//
|
||||
// H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
// |____|____|____|____|____________________|__________________|
|
||||
// | | | | | | |
|
||||
// gfxBase gfxTop
|
||||
// 0x0 0xFFF...FFF < 48 bit
|
||||
void GfxPartition::Heap::init(uint64_t base, uint64_t size) {
|
||||
this->base = base;
|
||||
this->size = size;
|
||||
|
||||
uint64_t gfxTop = gpuAddressSpace + 1;
|
||||
uint64_t gfxBase = is64bit ? MemoryConstants::max64BitAppAddress + 1 : MemoryConstants::max32BitAddress + 1;
|
||||
const uint64_t gfxHeap32Size = 4 * MemoryConstants::gigaByte;
|
||||
|
||||
if (gpuAddressSpace < MemoryConstants::max48BitAddress) {
|
||||
gfxBase = 0ull;
|
||||
// Exclude very first and very last 64K from GPU address range allocation
|
||||
if (size > 2 * GfxPartition::heapGranularity) {
|
||||
size -= 2 * GfxPartition::heapGranularity;
|
||||
}
|
||||
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
|
||||
alloc = std::make_unique<HeapAllocator>(base + GfxPartition::heapGranularity, size);
|
||||
}
|
||||
|
||||
void GfxPartition::freeGpuAddressRange(uint64_t ptr, size_t size) {
|
||||
for (auto heapName : GfxPartition::heapNonSvmNames) {
|
||||
auto &heap = getHeap(heapName);
|
||||
if ((ptr > heap.getBase()) && ((ptr + size) < heap.getLimit())) {
|
||||
heap.free(ptr, size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve) {
|
||||
|
||||
/*
|
||||
* I. 64-bit builds:
|
||||
*
|
||||
* 1) 48-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |__________________________________|____|____|____|____|________________|______________|
|
||||
* | | | | | | | |
|
||||
* | gfxBase gfxTop
|
||||
* 0x0 0x0000800000000000 0x0000FFFFFFFFFFFF
|
||||
*
|
||||
*
|
||||
* 2) 47-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* gfxSize = 2^47 / 4 = 0x200000000000
|
||||
* ________________________________________________
|
||||
* / \
|
||||
* SVM / H0 H1 H2 H3 STANDARD STANDARD64K \ SVM
|
||||
* |________________|____|____|____|____|________________|______________|_______________|
|
||||
* | | | | | | | | |
|
||||
* | gfxBase gfxTop |
|
||||
* 0x0 reserveCpuAddressRange(gfxSize) 0x00007FFFFFFFFFFF
|
||||
* \_____________________________________ SVM _________________________________________/
|
||||
*
|
||||
*
|
||||
*
|
||||
* 3) Limited Range gfx layout (no SVM):
|
||||
*
|
||||
* H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |____|____|____|____|____________________|__________________|
|
||||
* | | | | | | |
|
||||
* gfxBase gfxTop
|
||||
* 0x0 0xFFF...FFF < 47 bit
|
||||
*
|
||||
*
|
||||
* II. 32-bit builds:
|
||||
*
|
||||
* 1) 32-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |_______|____|____|____|____|________________|______________|
|
||||
* | | | | | | | |
|
||||
* | gfxBase gfxTop
|
||||
* 0x0 0x100000000 gpuAddressSpace
|
||||
*/
|
||||
|
||||
uint64_t gfxTop = gpuAddressSpace + 1;
|
||||
uint64_t gfxBase = 0x0ull;
|
||||
const uint64_t gfxHeap32Size = 4 * MemoryConstants::gigaByte;
|
||||
|
||||
if (is32bit) {
|
||||
gfxBase = maxNBitValue<32> + 1;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
|
||||
} else {
|
||||
if (gpuAddressSpace == maxNBitValue<48>) {
|
||||
gfxBase = maxNBitValue<48 - 1> + 1;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
|
||||
} else if (gpuAddressSpace == maxNBitValue<47>) {
|
||||
reservedCpuAddressRangeSize = cpuAddressRangeSizeToReserve;
|
||||
UNRECOVERABLE_IF(reservedCpuAddressRangeSize == 0);
|
||||
reservedCpuAddressRange = OSMemory::reserveCpuAddressRange(reservedCpuAddressRangeSize);
|
||||
UNRECOVERABLE_IF(reservedCpuAddressRange == nullptr);
|
||||
UNRECOVERABLE_IF(!isAligned<GfxPartition::heapGranularity>(reservedCpuAddressRange));
|
||||
gfxBase = reinterpret_cast<uint64_t>(reservedCpuAddressRange);
|
||||
gfxTop = gfxBase + reservedCpuAddressRangeSize;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gpuAddressSpace + 1);
|
||||
} else if (gpuAddressSpace < maxNBitValue<47>) {
|
||||
gfxBase = 0ull;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, 0ull);
|
||||
} else {
|
||||
UNRECOVERABLE_IF("Invalid GPU Address Range!");
|
||||
}
|
||||
}
|
||||
|
||||
for (auto heap : GfxPartition::heap32Names) {
|
||||
heapInit(heap, gfxBase, gfxHeap32Size);
|
||||
|
||||
@@ -31,8 +31,9 @@ constexpr auto internalHeapIndex = is32bit ? HeapIndex::HEAP_INTERNAL : HeapInde
|
||||
class GfxPartition {
|
||||
public:
|
||||
GfxPartition() {}
|
||||
~GfxPartition();
|
||||
|
||||
void init(uint64_t gpuAddressSpace);
|
||||
void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve);
|
||||
|
||||
void heapInit(HeapIndex heapIndex, uint64_t base, uint64_t size) {
|
||||
getHeap(heapIndex).init(base, size);
|
||||
@@ -46,33 +47,35 @@ class GfxPartition {
|
||||
getHeap(heapIndex).free(ptr, size);
|
||||
}
|
||||
|
||||
void freeGpuAddressRange(uint64_t ptr, size_t size);
|
||||
|
||||
uint64_t getHeapBase(HeapIndex heapIndex) {
|
||||
return getHeap(heapIndex).getBase();
|
||||
}
|
||||
|
||||
uint64_t getHeapLimit(HeapIndex heapIndex) {
|
||||
return getHeap(heapIndex).getBase() + getHeap(heapIndex).getSize() - 1;
|
||||
return getHeap(heapIndex).getLimit();
|
||||
}
|
||||
|
||||
uint64_t getHeapMinimalAddress(HeapIndex heapIndex) {
|
||||
return getHeapBase(heapIndex) + heapGranularity;
|
||||
}
|
||||
|
||||
bool isLimitedRange() { return getHeap(HeapIndex::HEAP_SVM).getSize() == 0ull; }
|
||||
|
||||
static const uint64_t heapGranularity = MemoryConstants::pageSize64k;
|
||||
|
||||
static const std::array<HeapIndex, 4> heap32Names;
|
||||
static const std::array<HeapIndex, 6> heapNonSvmNames;
|
||||
|
||||
protected:
|
||||
class Heap {
|
||||
public:
|
||||
Heap() = default;
|
||||
void init(uint64_t base, uint64_t size);
|
||||
uint64_t getBase() const { return base; }
|
||||
uint64_t getSize() const { return size; }
|
||||
void init(uint64_t base, uint64_t size) {
|
||||
this->base = base;
|
||||
this->size = size;
|
||||
alloc = std::make_unique<HeapAllocator>(base + heapGranularity, size ? size - heapGranularity : 0ull);
|
||||
}
|
||||
uint64_t getLimit() const { return base + size - 1; }
|
||||
uint64_t allocate(size_t &size) { return alloc->allocate(size); }
|
||||
void free(uint64_t ptr, size_t size) { alloc->free(ptr, size); }
|
||||
|
||||
@@ -82,10 +85,13 @@ class GfxPartition {
|
||||
};
|
||||
|
||||
Heap &getHeap(HeapIndex heapIndex) {
|
||||
return heap[static_cast<uint32_t>(heapIndex)];
|
||||
return heaps[static_cast<uint32_t>(heapIndex)];
|
||||
}
|
||||
|
||||
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heap;
|
||||
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heaps;
|
||||
|
||||
void *reservedCpuAddressRange = nullptr;
|
||||
size_t reservedCpuAddressRangeSize = 0;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -111,6 +111,8 @@ class MemoryManager {
|
||||
|
||||
virtual uint64_t getExternalHeapBaseAddress() = 0;
|
||||
|
||||
bool isLimitedRange() { return gfxPartition.isLimitedRange(); }
|
||||
|
||||
bool peek64kbPagesEnabled() const { return enable64kbpages; }
|
||||
bool peekForce32BitAllocations() const { return force32bitAllocations; }
|
||||
virtual void setForce32BitAllocations(bool newValue);
|
||||
|
||||
@@ -18,11 +18,20 @@
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/memory_manager/host_ptr_manager.h"
|
||||
#include "runtime/os_interface/os_memory.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
OsAgnosticMemoryManager::OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment) : MemoryManager(executionEnvironment) {
|
||||
auto gpuAddressSpace = executionEnvironment.getHardwareInfo()->capabilityTable.gpuAddressSpace;
|
||||
|
||||
// 4 x sizeof(Heap32) + 2 x sizeof(Standard/Standard64k)
|
||||
size_t reservedCpuAddressRangeSize = is64bit ? (4 * 4 + 2 * (aubUsage ? 32 : 4)) * GB : 0;
|
||||
gfxPartition.init(gpuAddressSpace, reservedCpuAddressRangeSize);
|
||||
}
|
||||
|
||||
OsAgnosticMemoryManager::~OsAgnosticMemoryManager() {
|
||||
applyCommonCleanup();
|
||||
}
|
||||
@@ -35,22 +44,19 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment
|
||||
MemoryAllocation *memoryAllocation = nullptr;
|
||||
|
||||
if (fakeBigAllocations && allocationData.size > bigAllocation) {
|
||||
memoryAllocation = new MemoryAllocation(
|
||||
memoryAllocation = createMemoryAllocation(
|
||||
allocationData.type, nullptr, (void *)dummyAddress, static_cast<uint64_t>(dummyAddress), allocationData.size, counter,
|
||||
MemoryPool::System4KBPages, allocationData.flags.multiOsContextCapable, allocationData.flags.uncacheable,
|
||||
allocationData.flags.flushL3);
|
||||
allocationData.flags.flushL3, false);
|
||||
counter++;
|
||||
return memoryAllocation;
|
||||
}
|
||||
auto ptr = allocateSystemMemory(sizeAligned, allocationData.alignment ? alignUp(allocationData.alignment, MemoryConstants::pageSize) : MemoryConstants::pageSize);
|
||||
if (ptr != nullptr) {
|
||||
memoryAllocation = new MemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast<uint64_t>(ptr), allocationData.size,
|
||||
counter, MemoryPool::System4KBPages, allocationData.flags.multiOsContextCapable,
|
||||
allocationData.flags.uncacheable, allocationData.flags.flushL3);
|
||||
if (!memoryAllocation) {
|
||||
alignedFreeWrapper(ptr);
|
||||
return nullptr;
|
||||
}
|
||||
memoryAllocation = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast<uint64_t>(ptr), allocationData.size,
|
||||
counter, MemoryPool::System4KBPages, allocationData.flags.multiOsContextCapable,
|
||||
allocationData.flags.uncacheable, allocationData.flags.flushL3, false);
|
||||
|
||||
if (allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) {
|
||||
//add 2MB padding in case mapPtr is not 2MB aligned
|
||||
size_t reserveSize = sizeAligned + allocationData.alignment;
|
||||
@@ -73,11 +79,12 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForNonSvmHost
|
||||
auto alignedPtr = alignDown(allocationData.hostPtr, MemoryConstants::pageSize);
|
||||
auto offsetInPage = ptrDiff(allocationData.hostPtr, alignedPtr);
|
||||
|
||||
auto memoryAllocation = new MemoryAllocation(allocationData.type, nullptr, const_cast<void *>(allocationData.hostPtr),
|
||||
reinterpret_cast<uint64_t>(alignedPtr), allocationData.size, counter,
|
||||
MemoryPool::System4KBPages, false, false, allocationData.flags.flushL3);
|
||||
auto memoryAllocation = createMemoryAllocation(allocationData.type, nullptr, const_cast<void *>(allocationData.hostPtr),
|
||||
reinterpret_cast<uint64_t>(alignedPtr), allocationData.size, counter,
|
||||
MemoryPool::System4KBPages, false, false, allocationData.flags.flushL3, false);
|
||||
|
||||
memoryAllocation->setAllocationOffset(offsetInPage);
|
||||
|
||||
counter++;
|
||||
return memoryAllocation;
|
||||
}
|
||||
@@ -94,9 +101,10 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemory64kb(const Al
|
||||
}
|
||||
|
||||
GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) {
|
||||
auto heap = useInternal32BitAllocator(allocationData.type) ? internalHeapIndex : HeapIndex::HEAP_EXTERNAL;
|
||||
if (allocationData.hostPtr) {
|
||||
auto allocationSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size);
|
||||
auto gpuVirtualAddress = allocator32Bit->allocate(allocationSize);
|
||||
auto gpuVirtualAddress = gfxPartition.heapAllocate(heap, allocationSize);
|
||||
if (!gpuVirtualAddress) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -104,8 +112,9 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
|
||||
MemoryAllocation *memAlloc = new MemoryAllocation(
|
||||
allocationData.type, nullptr, const_cast<void *>(allocationData.hostPtr), GmmHelper::canonize(gpuVirtualAddress + offset),
|
||||
allocationData.size, counter, MemoryPool::System4KBPagesWith32BitGpuAddressing, false, false, false);
|
||||
|
||||
memAlloc->set32BitAllocation(true);
|
||||
memAlloc->setGpuBaseAddress(GmmHelper::canonize(getExternalHeapBaseAddress()));
|
||||
memAlloc->setGpuBaseAddress(GmmHelper::canonize(gfxPartition.getHeapBase(heap)));
|
||||
memAlloc->sizeToFree = allocationSize;
|
||||
|
||||
counter++;
|
||||
@@ -114,7 +123,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
|
||||
|
||||
auto allocationSize = alignUp(allocationData.size, MemoryConstants::pageSize);
|
||||
void *ptrAlloc = nullptr;
|
||||
auto gpuAddress = allocator32Bit->allocate(allocationSize);
|
||||
auto gpuAddress = gfxPartition.heapAllocate(heap, allocationSize);
|
||||
|
||||
if (allocationData.size < 0xfffff000) {
|
||||
if (fakeBigAllocations) {
|
||||
@@ -129,8 +138,9 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
|
||||
memoryAllocation = new MemoryAllocation(allocationData.type, ptrAlloc, ptrAlloc, GmmHelper::canonize(gpuAddress),
|
||||
allocationData.size, counter, MemoryPool::System4KBPagesWith32BitGpuAddressing, false,
|
||||
false, false);
|
||||
|
||||
memoryAllocation->set32BitAllocation(true);
|
||||
memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(getExternalHeapBaseAddress()));
|
||||
memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition.getHeapBase(heap)));
|
||||
memoryAllocation->sizeToFree = allocationSize;
|
||||
}
|
||||
counter++;
|
||||
@@ -138,9 +148,9 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con
|
||||
}
|
||||
|
||||
GraphicsAllocation *OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) {
|
||||
auto graphicsAllocation = new MemoryAllocation(properties.allocationType, nullptr, reinterpret_cast<void *>(1), 1,
|
||||
4096u, static_cast<uint64_t>(handle), MemoryPool::SystemCpuInaccessible, false,
|
||||
false, false);
|
||||
auto graphicsAllocation = createMemoryAllocation(properties.allocationType, nullptr, reinterpret_cast<void *>(1), 1,
|
||||
4096u, static_cast<uint64_t>(handle), MemoryPool::SystemCpuInaccessible, false,
|
||||
false, false, requireSpecificBitness);
|
||||
graphicsAllocation->setSharedHandle(handle);
|
||||
graphicsAllocation->set32BitAllocation(requireSpecificBitness);
|
||||
|
||||
@@ -191,9 +201,12 @@ void OsAgnosticMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllo
|
||||
return;
|
||||
}
|
||||
|
||||
if (gfxAllocation->is32BitAllocation()) {
|
||||
auto gpuAddressToFree = gfxAllocation->getGpuAddress() & ~MemoryConstants::pageMask;
|
||||
allocator32Bit->free(gpuAddressToFree, static_cast<MemoryAllocation *>(gfxAllocation)->sizeToFree);
|
||||
auto memoryAllocation = static_cast<MemoryAllocation *>(gfxAllocation);
|
||||
auto sizeToFree = memoryAllocation->sizeToFree;
|
||||
|
||||
if (sizeToFree) {
|
||||
auto gpuAddressToFree = GmmHelper::decanonize(memoryAllocation->getGpuAddress()) & ~MemoryConstants::pageMask;
|
||||
gfxPartition.freeGpuAddressRange(gpuAddressToFree, sizeToFree);
|
||||
}
|
||||
|
||||
alignedFreeWrapper(gfxAllocation->getDriverAllocatedCpuPtr());
|
||||
@@ -218,17 +231,22 @@ uint64_t OsAgnosticMemoryManager::getMaxApplicationAddress() {
|
||||
}
|
||||
|
||||
uint64_t OsAgnosticMemoryManager::getInternalHeapBaseAddress() {
|
||||
return this->allocator32Bit->getBase();
|
||||
return gfxPartition.getHeapBase(internalHeapIndex);
|
||||
}
|
||||
|
||||
uint64_t OsAgnosticMemoryManager::getExternalHeapBaseAddress() {
|
||||
return this->allocator32Bit->getBase();
|
||||
return gfxPartition.getHeapBase(HeapIndex::HEAP_EXTERNAL);
|
||||
}
|
||||
|
||||
void OsAgnosticMemoryManager::setForce32BitAllocations(bool newValue) {
|
||||
force32bitAllocations = newValue;
|
||||
}
|
||||
|
||||
GraphicsAllocation *OsAgnosticMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) {
|
||||
auto allocation = new MemoryAllocation(allocationData.type, nullptr, const_cast<void *>(allocationData.hostPtr),
|
||||
reinterpret_cast<uint64_t>(allocationData.hostPtr), allocationData.size, counter++,
|
||||
MemoryPool::System4KBPages, false, false, false);
|
||||
auto allocation = createMemoryAllocation(allocationData.type, nullptr, const_cast<void *>(allocationData.hostPtr),
|
||||
reinterpret_cast<uint64_t>(allocationData.hostPtr), allocationData.size, counter++,
|
||||
MemoryPool::System4KBPages, false, false, false, false);
|
||||
|
||||
allocation->fragmentsStorage = handleStorage;
|
||||
return allocation;
|
||||
}
|
||||
@@ -276,8 +294,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForImageImpl(
|
||||
|
||||
auto ptr = allocateSystemMemory(alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize), MemoryConstants::pageSize);
|
||||
if (ptr != nullptr) {
|
||||
alloc = new MemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast<uint64_t>(ptr), allocationData.imgInfo->size,
|
||||
counter, MemoryPool::SystemCpuInaccessible, false, allocationData.flags.uncacheable, allocationData.flags.flushL3);
|
||||
alloc = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast<uint64_t>(ptr), allocationData.imgInfo->size,
|
||||
counter, MemoryPool::SystemCpuInaccessible, false, allocationData.flags.uncacheable, allocationData.flags.flushL3, false);
|
||||
counter++;
|
||||
}
|
||||
|
||||
@@ -288,21 +306,6 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForImageImpl(
|
||||
return alloc;
|
||||
}
|
||||
|
||||
Allocator32bit *OsAgnosticMemoryManager::create32BitAllocator(bool aubUsage) {
|
||||
uint64_t allocatorSize = MemoryConstants::gigaByte - 2 * 4096;
|
||||
uint64_t heap32Base = 0x80000000000ul;
|
||||
|
||||
if (is64bit && this->localMemorySupported && aubUsage) {
|
||||
heap32Base = 0x40000000000ul;
|
||||
}
|
||||
|
||||
if (is32bit) {
|
||||
heap32Base = 0x0;
|
||||
}
|
||||
|
||||
return new Allocator32bit(heap32Base, allocatorSize);
|
||||
}
|
||||
|
||||
void *OsAgnosticMemoryManager::reserveCpuAddressRange(size_t size) {
|
||||
void *reservePtr = allocateSystemMemory(size, MemoryConstants::preferredAlignment);
|
||||
return reservePtr;
|
||||
@@ -311,4 +314,28 @@ void *OsAgnosticMemoryManager::reserveCpuAddressRange(size_t size) {
|
||||
void OsAgnosticMemoryManager::releaseReservedCpuAddressRange(void *reserved, size_t size) {
|
||||
alignedFreeWrapper(reserved);
|
||||
}
|
||||
|
||||
MemoryAllocation *OsAgnosticMemoryManager::createMemoryAllocation(GraphicsAllocation::AllocationType allocationType, void *driverAllocatedCpuPointer,
|
||||
void *pMem, uint64_t gpuAddress, size_t memSize, uint64_t count,
|
||||
MemoryPool::Type pool, bool multiOsContextCapable, bool uncacheable,
|
||||
bool flushL3Required, bool requireSpecificBitness) {
|
||||
if (!isLimitedRange()) {
|
||||
return new MemoryAllocation(allocationType, driverAllocatedCpuPointer, pMem, gpuAddress, memSize,
|
||||
count, pool, multiOsContextCapable, uncacheable, flushL3Required);
|
||||
}
|
||||
|
||||
size_t alignedSize = alignSizeWholePage(pMem, memSize);
|
||||
|
||||
auto heap = (force32bitAllocations || requireSpecificBitness) ? HeapIndex::HEAP_EXTERNAL : HeapIndex::HEAP_STANDARD;
|
||||
|
||||
uint64_t limitedGpuAddress = gfxPartition.heapAllocate(heap, alignedSize);
|
||||
|
||||
auto memoryAllocation = new MemoryAllocation(allocationType, driverAllocatedCpuPointer, pMem, limitedGpuAddress, memSize,
|
||||
count, pool, multiOsContextCapable, uncacheable, flushL3Required);
|
||||
|
||||
memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition.getHeapBase(heap)));
|
||||
memoryAllocation->sizeToFree = alignedSize;
|
||||
|
||||
return memoryAllocation;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
|
||||
#pragma once
|
||||
#include "core/helpers/basic_math.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
@@ -22,6 +20,15 @@ class MemoryAllocation : public GraphicsAllocation {
|
||||
|
||||
void setSharedHandle(osHandle handle) { sharingInfo.sharedHandle = handle; }
|
||||
|
||||
MemoryAllocation(AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn,
|
||||
MemoryPool::Type pool, bool multiOsContextCapable)
|
||||
: GraphicsAllocation(allocationType, cpuPtrIn, gpuAddress, baseAddress, sizeIn, pool, multiOsContextCapable),
|
||||
id(0), uncacheable(false) {}
|
||||
|
||||
MemoryAllocation(AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool, bool multiOsContextCapable)
|
||||
: GraphicsAllocation(allocationType, cpuPtrIn, sizeIn, sharedHandleIn, pool, multiOsContextCapable),
|
||||
id(0), uncacheable(false) {}
|
||||
|
||||
MemoryAllocation(AllocationType allocationType, void *driverAllocatedCpuPointer, void *pMem, uint64_t gpuAddress, size_t memSize,
|
||||
uint64_t count, MemoryPool::Type pool, bool multiOsContextCapable, bool uncacheable, bool flushL3Required)
|
||||
: GraphicsAllocation(allocationType, pMem, gpuAddress, 0u, memSize, pool, multiOsContextCapable),
|
||||
@@ -40,12 +47,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
|
||||
using MemoryManager::allocateGraphicsMemory;
|
||||
|
||||
OsAgnosticMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, executionEnvironment) {}
|
||||
|
||||
OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment) : MemoryManager(executionEnvironment) {
|
||||
allocator32Bit.reset(create32BitAllocator(aubUsage));
|
||||
gfxPartition.init(platformDevices[0]->capabilityTable.gpuAddressSpace);
|
||||
}
|
||||
|
||||
OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment);
|
||||
~OsAgnosticMemoryManager() override;
|
||||
GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override;
|
||||
GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle) override { return nullptr; }
|
||||
@@ -61,11 +63,10 @@ class OsAgnosticMemoryManager : public MemoryManager {
|
||||
uint64_t getMaxApplicationAddress() override;
|
||||
uint64_t getInternalHeapBaseAddress() override;
|
||||
uint64_t getExternalHeapBaseAddress() override;
|
||||
void setForce32BitAllocations(bool newValue) override;
|
||||
|
||||
void turnOnFakingBigAllocations();
|
||||
|
||||
Allocator32bit *create32BitAllocator(bool enableLocalMemory);
|
||||
|
||||
void *reserveCpuAddressRange(size_t size) override;
|
||||
void releaseReservedCpuAddressRange(void *reserved, size_t size) override;
|
||||
|
||||
@@ -80,6 +81,8 @@ class OsAgnosticMemoryManager : public MemoryManager {
|
||||
void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) override {}
|
||||
GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override;
|
||||
GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override;
|
||||
MemoryAllocation *createMemoryAllocation(GraphicsAllocation::AllocationType allocationType, void *driverAllocatedCpuPointer, void *pMem, uint64_t gpuAddress, size_t memSize,
|
||||
uint64_t count, MemoryPool::Type pool, bool multiOsContextCapable, bool uncacheable, bool flushL3Required, bool requireSpecificBitness);
|
||||
|
||||
private:
|
||||
unsigned long long counter = 0;
|
||||
|
||||
@@ -18,6 +18,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_inc_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_memory.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_time.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_time.h
|
||||
|
||||
@@ -45,6 +45,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_linux.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_time_linux.cpp
|
||||
|
||||
@@ -10,4 +10,5 @@
|
||||
|
||||
namespace NEO {
|
||||
size_t getSizeToMap();
|
||||
} // namespace NEO
|
||||
size_t getSizeToReserve();
|
||||
} // namespace NEO
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "runtime/helpers/surface_formats.h"
|
||||
#include "runtime/memory_manager/host_ptr_manager.h"
|
||||
#include "runtime/os_interface/32bit_memory.h"
|
||||
#include "runtime/os_interface/linux/allocator_helper.h"
|
||||
#include "runtime/os_interface/linux/os_context_linux.h"
|
||||
#include "runtime/os_interface/linux/os_interface.h"
|
||||
#include "runtime/os_interface/linux/tiling_mode_helper.h"
|
||||
@@ -39,7 +40,7 @@ DrmMemoryManager::DrmMemoryManager(gemCloseWorkerMode mode,
|
||||
forcePinEnabled(forcePinAllowed),
|
||||
validateHostPtrMemory(validateHostPtrMemory) {
|
||||
supportsMultiStorageResources = false;
|
||||
gfxPartition.init(platformDevices[0]->capabilityTable.gpuAddressSpace);
|
||||
gfxPartition.init(platformDevices[0]->capabilityTable.gpuAddressSpace, getSizeToReserve());
|
||||
MemoryManager::virtualPaddingAvailable = true;
|
||||
if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) {
|
||||
gemCloseWorker.reset(new DrmGemCloseWorker(*this));
|
||||
|
||||
23
runtime/os_interface/linux/os_memory_linux.cpp
Normal file
23
runtime/os_interface/linux/os_memory_linux.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/os_memory.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void *OSMemory::reserveCpuAddressRange(size_t sizeToReserve) {
|
||||
return mmap(0, sizeToReserve, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, open("/dev/null", O_RDONLY), 0);
|
||||
}
|
||||
|
||||
void OSMemory::releaseCpuAddressRange(void *reservedCpuAddressRange, size_t reservedSize) {
|
||||
munmap(reservedCpuAddressRange, reservedSize);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
19
runtime/os_interface/os_memory.h
Normal file
19
runtime/os_interface/os_memory.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct OSMemory {
|
||||
public:
|
||||
static void *reserveCpuAddressRange(size_t sizeToReserve);
|
||||
static void releaseCpuAddressRange(void *reservedCpuAddressRange, size_t reservedSize);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -35,6 +35,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_win.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_socket.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.h
|
||||
|
||||
22
runtime/os_interface/windows/os_memory_win.cpp
Normal file
22
runtime/os_interface/windows/os_memory_win.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/os_memory.h"
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void *OSMemory::reserveCpuAddressRange(size_t sizeToReserve) {
|
||||
return VirtualAlloc(0, sizeToReserve, MEM_RESERVE, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
void OSMemory::releaseCpuAddressRange(void *reservedCpuAddressRange, size_t /* reservedSize */) {
|
||||
VirtualFree(reservedCpuAddressRange, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user