[19/n] Internal 4GB allocator.

- Allocator now uses uint64_t instead of void*.
- This is due to the fact that it is required to work on 64 bit addresses
in 32 bit dll.

Change-Id: Ia715ea7913efc95a2974aff8dff390203d8125a8
This commit is contained in:
Mrozek, Michal
2018-03-27 16:43:47 +02:00
committed by sys_ocldev
parent 2cbb76ac88
commit 3f59acf54a
8 changed files with 267 additions and 280 deletions

View File

@@ -75,7 +75,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemory(size_t
return nullptr;
}
uint64_t offset = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr) & MemoryConstants::pageMask);
MemoryAllocation *memAlloc = new MemoryAllocation(false, reinterpret_cast<void *>(ptr), Gmm::canonize(reinterpret_cast<uint64_t>(gpuVirtualAddress) + offset), size, counter);
MemoryAllocation *memAlloc = new MemoryAllocation(false, reinterpret_cast<void *>(ptr), Gmm::canonize(gpuVirtualAddress + offset), size, counter);
memAlloc->is32BitAllocation = true;
memAlloc->gpuBaseAddress = Gmm::canonize(allocator32Bit->getBase());
memAlloc->sizeToFree = allocationSize;
@@ -89,11 +89,11 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemory(size_t
if (size < 0xfffff000)
ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment);
void *gpuPointer = allocator32Bit->allocate(allocationSize);
auto gpuAddress = allocator32Bit->allocate(allocationSize);
MemoryAllocation *memoryAllocation = nullptr;
if (ptrAlloc != nullptr) {
memoryAllocation = new MemoryAllocation(true, ptrAlloc, Gmm::canonize(reinterpret_cast<uint64_t>(gpuPointer)), size, counter);
memoryAllocation = new MemoryAllocation(true, ptrAlloc, Gmm::canonize(gpuAddress), size, counter);
memoryAllocation->is32BitAllocation = true;
memoryAllocation->gpuBaseAddress = Gmm::canonize(allocator32Bit->getBase());
memoryAllocation->sizeToFree = allocationSize;
@@ -129,8 +129,8 @@ void OsAgnosticMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllo
void *ptr = gfxAllocation->getUnderlyingBuffer();
if (gfxAllocation->is32BitAllocation) {
void *gpuPtrToFree = reinterpret_cast<void *>(gfxAllocation->getGpuAddress() & ~MemoryConstants::pageMask);
allocator32Bit->free(gpuPtrToFree, static_cast<MemoryAllocation *>(gfxAllocation)->sizeToFree);
auto gpuAddressToFree = gfxAllocation->getGpuAddress() & ~MemoryConstants::pageMask;
allocator32Bit->free(gpuAddressToFree, static_cast<MemoryAllocation *>(gfxAllocation)->sizeToFree);
}
if (gfxAllocation->cpuPtrAllocated) {
alignedFreeWrapper(ptr);

View File

@@ -38,9 +38,9 @@ class Allocator32bit {
Allocator32bit();
~Allocator32bit();
void *allocate(size_t &size);
uint64_t allocate(size_t &size);
uintptr_t getBase();
int free(void *ptr, size_t size);
int free(uint64_t ptr, size_t size);
protected:
std::unique_ptr<OsInternals> osInternals;

View File

@@ -96,7 +96,7 @@ bool OCLRT::is32BitOsAllocatorAvailable = true;
Allocator32bit::Allocator32bit(uint64_t base, uint64_t size) {
this->base = base;
this->size = size;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)base, size));
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(base, size));
}
OCLRT::Allocator32bit::Allocator32bit() : Allocator32bit(new OsInternals) {
@@ -133,7 +133,7 @@ OCLRT::Allocator32bit::Allocator32bit(Allocator32bit::OsInternals *osInternalsIn
base = (uint64_t)ptr;
size = sizeToMap;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(ptr, sizeToMap));
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(base, sizeToMap));
} else {
this->osInternals->drmAllocator = new Allocator32bit::OsInternals::Drm32BitAllocator(*this->osInternals);
}
@@ -149,24 +149,24 @@ OCLRT::Allocator32bit::~Allocator32bit() {
}
}
void *OCLRT::Allocator32bit::allocate(size_t &size) {
void *ptr = nullptr;
uint64_t OCLRT::Allocator32bit::allocate(size_t &size) {
uint64_t ptr = 0llu;
if (DebugManager.flags.UseNewHeapAllocator.get()) {
ptr = this->heapAllocator->allocate(size);
} else {
ptr = this->osInternals->drmAllocator->allocate(size);
ptr = reinterpret_cast<uint64_t>(this->osInternals->drmAllocator->allocate(size));
}
return ptr;
}
int Allocator32bit::free(void *ptr, size_t size) {
if ((ptr == MAP_FAILED) || (ptr == nullptr))
int Allocator32bit::free(uint64_t ptr, size_t size) {
if ((ptr == reinterpret_cast<uint64_t>(MAP_FAILED)) || (ptr == 0llu))
return 0;
if (DebugManager.flags.UseNewHeapAllocator.get()) {
this->heapAllocator->free(ptr, size);
} else {
return this->osInternals->drmAllocator->free(ptr, size);
return this->osInternals->drmAllocator->free(reinterpret_cast<void *>(ptr), size);
}
return 0;
}

View File

@@ -128,11 +128,12 @@ uint32_t DrmMemoryManager::unreference(OCLRT::BufferObject *bo, bool synchronous
if (allocatorType == MMAP_ALLOCATOR) {
munmapFunction(address, unmapSize);
} else {
uint64_t graphicsAddress = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(address));
if (allocatorType == BIT32_ALLOCATOR_EXTERNAL) {
allocator32Bit->free(address, unmapSize);
allocator32Bit->free(graphicsAddress, unmapSize);
} else {
UNRECOVERABLE_IF(allocatorType != BIT32_ALLOCATOR_INTERNAL)
internal32bitAllocator->free(address, unmapSize);
internal32bitAllocator->free(graphicsAddress, unmapSize);
}
}
@@ -276,7 +277,7 @@ DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *
bo->isAllocated = false;
bo->setUnmapSize(realAllocationSize);
bo->address = gpuVirtualAddress;
bo->address = reinterpret_cast<void *>(gpuVirtualAddress);
uintptr_t offset = (uintptr_t)bo->address;
bo->softPin((uint64_t)offset);
bo->setAllocationType(allocationType);
@@ -300,7 +301,7 @@ DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *
return nullptr;
}
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), alignedAllocationSize, 0, true);
BufferObject *bo = allocUserptr(res, alignedAllocationSize, 0, true);
if (!bo) {
allocatorToUse->free(res, allocationSize);
@@ -312,7 +313,7 @@ DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *
bo->setAllocationType(allocationType);
auto drmAllocation = new DrmAllocation(bo, res, alignedAllocationSize);
auto drmAllocation = new DrmAllocation(bo, reinterpret_cast<void *>(res), alignedAllocationSize);
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocatorToUse->getBase();
return drmAllocation;
@@ -338,18 +339,18 @@ BufferObject *DrmMemoryManager::findAndReferenceSharedBufferObject(int boHandle)
}
BufferObject *DrmMemoryManager::createSharedBufferObject(int boHandle, size_t size, bool requireSpecificBitness) {
void *gpuRange = nullptr;
uint64_t gpuRange = 0llu;
StorageAllocatorType storageType = UNKNOWN_ALLOCATOR;
if (requireSpecificBitness && this->force32bitAllocations) {
gpuRange = this->allocator32Bit->allocate(size);
storageType = BIT32_ALLOCATOR_EXTERNAL;
} else {
gpuRange = mmapFunction(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
gpuRange = reinterpret_cast<uint64_t>(mmapFunction(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
storageType = MMAP_ALLOCATOR;
}
DEBUG_BREAK_IF(gpuRange == MAP_FAILED);
DEBUG_BREAK_IF(gpuRange == reinterpret_cast<uint64_t>(MAP_FAILED));
auto bo = new (std::nothrow) BufferObject(this->drm, boHandle, true);
if (!bo) {
@@ -358,7 +359,7 @@ BufferObject *DrmMemoryManager::createSharedBufferObject(int boHandle, size_t si
bo->size = size;
bo->address = reinterpret_cast<void *>(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
bo->softPin(gpuRange);
bo->setUnmapSize(size);
bo->setAllocationType(storageType);
return bo;

View File

@@ -34,7 +34,7 @@ class Allocator32bit::OsInternals {
Allocator32bit::Allocator32bit(uint64_t base, uint64_t size) {
this->base = base;
this->size = size;
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)base, size));
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(base, size));
}
OCLRT::Allocator32bit::Allocator32bit() {
@@ -43,7 +43,7 @@ OCLRT::Allocator32bit::Allocator32bit() {
osInternals = std::unique_ptr<OsInternals>(new OsInternals);
osInternals.get()->allocatedRange = (void *)((uintptr_t)this->base);
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator((void *)this->base, sizeToMap));
heapAllocator = std::unique_ptr<HeapAllocator>(new HeapAllocator(this->base, sizeToMap));
}
OCLRT::Allocator32bit::~Allocator32bit() {
@@ -52,13 +52,13 @@ OCLRT::Allocator32bit::~Allocator32bit() {
}
}
void *Allocator32bit::allocate(size_t &size) {
uint64_t Allocator32bit::allocate(size_t &size) {
if (size >= 0xfffff000)
return nullptr;
return 0llu;
return this->heapAllocator->allocate(size);
}
int Allocator32bit::free(void *ptr, size_t size) {
int Allocator32bit::free(uint64_t ptr, size_t size) {
this->heapAllocator->free(ptr, size);
return 0;
}

View File

@@ -33,8 +33,8 @@
namespace OCLRT {
struct HeapChunk {
HeapChunk(void *ptr, size_t size) : ptr(ptr), size(size) {}
void *ptr;
HeapChunk(uint64_t ptr, size_t size) : ptr(ptr), size(size) {}
uint64_t ptr;
size_t size;
};
@@ -42,16 +42,16 @@ bool operator<(const HeapChunk &hc1, const HeapChunk &hc2);
class HeapAllocator {
public:
HeapAllocator(void *address, uint64_t size) : address(address), size(size), availableSize(size), sizeThreshold(defaultSizeThreshold) {
pLeftBound = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(address));
pRightBound = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(address) + (size_t)size);
HeapAllocator(uint64_t address, uint64_t size) : address(address), size(size), availableSize(size), sizeThreshold(defaultSizeThreshold) {
pLeftBound = address;
pRightBound = address + size;
freedChunksBig.reserve(10);
freedChunksSmall.reserve(50);
}
HeapAllocator(void *address, uint64_t size, size_t threshold) : address(address), size(size), availableSize(size), sizeThreshold(threshold) {
pLeftBound = reinterpret_cast<uint64_t>(address);
pRightBound = reinterpret_cast<uint64_t>(address) + size;
HeapAllocator(uint64_t address, uint64_t size, size_t threshold) : address(address), size(size), availableSize(size), sizeThreshold(threshold) {
pLeftBound = address;
pRightBound = address + size;
freedChunksBig.reserve(10);
freedChunksSmall.reserve(50);
}
@@ -59,40 +59,40 @@ class HeapAllocator {
~HeapAllocator() {
}
void *allocate(size_t &sizeToAllocate) {
uint64_t allocate(size_t &sizeToAllocate) {
std::lock_guard<std::mutex> lock(mtx);
sizeToAllocate = alignUp(sizeToAllocate, allocationAlignment);
void *ptrReturn = nullptr;
uint64_t ptrReturn = 0llu;
DBG_LOG(PrintDebugMessages, __FUNCTION__, "Allocator usage == ", this->getUsage());
if (availableSize < sizeToAllocate) {
return nullptr;
return 0llu;
}
std::vector<HeapChunk> &freedChunks = (sizeToAllocate > sizeThreshold) ? freedChunksBig : freedChunksSmall;
size_t sizeOfFreedChunk = 0;
uint32_t defragmentCount = 0;
while (ptrReturn == nullptr) {
while (ptrReturn == 0llu) {
ptrReturn = getFromFreedChunks(sizeToAllocate, freedChunks, sizeOfFreedChunk);
if (ptrReturn == nullptr) {
if (ptrReturn == 0llu) {
if (sizeToAllocate > sizeThreshold) {
if (pLeftBound + sizeToAllocate <= pRightBound) {
ptrReturn = reinterpret_cast<void *>(pLeftBound);
ptrReturn = pLeftBound;
pLeftBound += sizeToAllocate;
}
} else {
if (pRightBound - sizeToAllocate >= pLeftBound) {
pRightBound -= sizeToAllocate;
ptrReturn = reinterpret_cast<void *>(pRightBound);
ptrReturn = pRightBound;
}
}
}
if (ptrReturn != nullptr) {
if (ptrReturn != 0llu) {
if (sizeOfFreedChunk > 0) {
availableSize -= sizeOfFreedChunk;
sizeToAllocate = sizeOfFreedChunk;
@@ -101,7 +101,7 @@ class HeapAllocator {
}
}
if (ptrReturn == nullptr) {
if (ptrReturn == 0llu) {
if (defragmentCount == 1)
break;
defragment();
@@ -112,10 +112,10 @@ class HeapAllocator {
return ptrReturn;
}
void free(void *ptr, size_t size) {
void free(uint64_t ptr, size_t size) {
std::lock_guard<std::mutex> lock(mtx);
uintptr_t ptrIn = reinterpret_cast<uintptr_t>(ptr);
if (ptrIn == 0u)
auto ptrIn = ptr;
if (ptrIn == 0llu)
return;
DBG_LOG(PrintDebugMessages, __FUNCTION__, "Allocator usage == ", this->getUsage());
@@ -151,7 +151,7 @@ class HeapAllocator {
}
protected:
void *address;
uint64_t address;
uint64_t size;
uint64_t availableSize;
uint64_t pLeftBound, pRightBound;
@@ -163,7 +163,7 @@ class HeapAllocator {
std::vector<HeapChunk> freedChunksBig;
std::mutex mtx;
void *getFromFreedChunks(size_t size, std::vector<HeapChunk> &freedChunks, size_t &sizeOfFreedChunk) {
uint64_t getFromFreedChunks(size_t size, std::vector<HeapChunk> &freedChunks, size_t &sizeOfFreedChunk) {
size_t elements = freedChunks.size();
size_t bestFitIndex = -1;
size_t bestFitSize = 0;
@@ -171,7 +171,7 @@ class HeapAllocator {
for (size_t i = 0; i < elements; i++) {
if (freedChunks[i].size == size) {
void *ptr = freedChunks[i].ptr;
auto ptr = freedChunks[i].ptr;
freedChunks.erase(freedChunks.begin() + i);
return ptr;
}
@@ -186,7 +186,7 @@ class HeapAllocator {
if (bestFitSize != 0) {
if (bestFitSize < (size << 1)) {
void *ptr = freedChunks[bestFitIndex].ptr;
auto ptr = freedChunks[bestFitIndex].ptr;
sizeOfFreedChunk = freedChunks[bestFitIndex].size;
freedChunks.erase(freedChunks.begin() + bestFitIndex);
return ptr;
@@ -195,26 +195,26 @@ class HeapAllocator {
DEBUG_BREAK_IF(!((size <= sizeThreshold) || ((size > sizeThreshold) && (sizeDelta > sizeThreshold))));
void *ptr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(freedChunks[bestFitIndex].ptr) + sizeDelta);
auto ptr = freedChunks[bestFitIndex].ptr + sizeDelta;
freedChunks[bestFitIndex].size = sizeDelta;
return ptr;
}
}
return nullptr;
return 0llu;
}
void storeInFreedChunks(void *ptr, size_t size, std::vector<HeapChunk> &freedChunks) {
void storeInFreedChunks(uint64_t ptr, size_t size, std::vector<HeapChunk> &freedChunks) {
size_t elements = freedChunks.size();
uintptr_t pLeft = reinterpret_cast<uintptr_t>(ptr);
uintptr_t pRight = reinterpret_cast<uintptr_t>(ptr) + size;
uint64_t pLeft = ptr;
uint64_t pRight = ptr + size;
bool freedChunkStored = false;
for (size_t i = 0; i < elements; i++) {
if (freedChunks[i].ptr == reinterpret_cast<void *>(pRight)) {
freedChunks[i].ptr = reinterpret_cast<void *>(pLeft);
if (freedChunks[i].ptr == pRight) {
freedChunks[i].ptr = pLeft;
freedChunks[i].size += size;
freedChunkStored = true;
} else if ((reinterpret_cast<uintptr_t>(freedChunks[i].ptr) + freedChunks[i].size) == pLeft) {
} else if ((freedChunks[i].ptr + freedChunks[i].size) == pLeft) {
freedChunks[i].size += size;
freedChunkStored = true;
}
@@ -234,7 +234,7 @@ class HeapAllocator {
size_t maxSizeOfSmallChunks = freedChunksSmall.size();
if (maxSizeOfSmallChunks > 0) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(freedChunksSmall[maxSizeOfSmallChunks - 1].ptr);
auto ptr = freedChunksSmall[maxSizeOfSmallChunks - 1].ptr;
size_t chunkSize = freedChunksSmall[maxSizeOfSmallChunks - 1].size;
if (ptr == pRightBound) {
pRightBound = ptr + chunkSize;
@@ -247,7 +247,7 @@ class HeapAllocator {
size_t maxSizeOfBigChunks = freedChunksBig.size();
if (maxSizeOfBigChunks > 0) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(freedChunksBig[maxSizeOfBigChunks - 1].ptr);
auto ptr = freedChunksBig[maxSizeOfBigChunks - 1].ptr;
size_t chunkSize = freedChunksBig[maxSizeOfBigChunks - 1].size;
if (ptr == (pLeftBound - chunkSize)) {
@@ -263,11 +263,11 @@ class HeapAllocator {
std::sort(freedChunksSmall.rbegin(), freedChunksSmall.rend());
size_t maxSize = freedChunksSmall.size();
for (size_t i = maxSize - 1; i > 0; --i) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(freedChunksSmall[i].ptr);
auto ptr = freedChunksSmall[i].ptr;
size_t chunkSize = freedChunksSmall[i].size;
if (reinterpret_cast<uintptr_t>(freedChunksSmall[i - 1].ptr) == ptr + chunkSize) {
freedChunksSmall[i - 1].ptr = reinterpret_cast<void *>(ptr);
if (freedChunksSmall[i - 1].ptr == ptr + chunkSize) {
freedChunksSmall[i - 1].ptr = ptr;
freedChunksSmall[i - 1].size += chunkSize;
freedChunksSmall.erase(freedChunksSmall.begin() + i);
}
@@ -279,9 +279,9 @@ class HeapAllocator {
size_t maxSize = freedChunksBig.size();
for (size_t i = maxSize - 1; i > 0; --i) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(freedChunksBig[i].ptr);
auto ptr = freedChunksBig[i].ptr;
size_t chunkSize = freedChunksBig[i].size;
if ((reinterpret_cast<uintptr_t>(freedChunksBig[i - 1].ptr) + freedChunksBig[i - 1].size) == ptr) {
if ((freedChunksBig[i - 1].ptr + freedChunksBig[i - 1].size) == ptr) {
freedChunksBig[i - 1].size += chunkSize;
freedChunksBig.erase(freedChunksBig.begin() + i);
}