compute-runtime/runtime/os_interface/linux/drm_memory_manager.cpp

609 lines
22 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/device/device.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/options.h"
#include "runtime/os_interface/32bit_memory.h"
#include "runtime/os_interface/linux/drm_allocation.h"
#include "runtime/os_interface/linux/drm_buffer_object.h"
#include "runtime/os_interface/linux/drm_memory_manager.h"
#include "runtime/helpers/surface_formats.h"
#include <cstring>
#include <iostream>
#include "drm/i915_drm.h"
#include "drm/drm.h"
#include "runtime/gmm_helper/gmm_helper.h"
#include "runtime/gmm_helper/resource_info.h"
namespace OCLRT {
DrmMemoryManager::DrmMemoryManager(Drm *drm, gemCloseWorkerMode mode, bool forcePinAllowed, bool validateHostPtrMemory) : MemoryManager(false),
drm(drm),
pinBB(nullptr),
forcePinEnabled(forcePinAllowed),
validateHostPtrMemory(validateHostPtrMemory) {
MemoryManager::virtualPaddingAvailable = true;
if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) {
gemCloseWorker.reset(new DrmGemCloseWorker(*this));
}
auto mem = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize);
DEBUG_BREAK_IF(mem == nullptr);
if (forcePinEnabled || validateHostPtrMemory) {
pinBB = allocUserptr(reinterpret_cast<uintptr_t>(mem), MemoryConstants::pageSize, 0, true);
}
if (!pinBB) {
alignedFree(mem);
DEBUG_BREAK_IF(true);
UNRECOVERABLE_IF(validateHostPtrMemory);
} else {
pinBB->isAllocated = true;
}
internal32bitAllocator.reset(new Allocator32bit);
}
DrmMemoryManager::~DrmMemoryManager() {
applyCommonCleanup();
if (gemCloseWorker) {
gemCloseWorker->close(false);
}
if (pinBB) {
unreference(pinBB);
pinBB = nullptr;
}
}
void DrmMemoryManager::push(DrmAllocation *alloc) {
gemCloseWorker->push(alloc);
}
void DrmMemoryManager::eraseSharedBufferObject(OCLRT::BufferObject *bo) {
std::lock_guard<decltype(mtx)> lock(mtx);
auto it = std::find(sharingBufferObjects.begin(), sharingBufferObjects.end(), bo);
//If an object isReused = true, it must be in the vector
DEBUG_BREAK_IF(it == sharingBufferObjects.end());
sharingBufferObjects.erase(it);
}
void DrmMemoryManager::pushSharedBufferObject(OCLRT::BufferObject *bo) {
std::lock_guard<decltype(mtx)> lock(mtx);
bo->isReused = true;
sharingBufferObjects.push_back(bo);
}
uint32_t DrmMemoryManager::unreference(OCLRT::BufferObject *bo, bool synchronousDestroy) {
if (!bo)
return -1;
if (synchronousDestroy) {
while (bo->refCount > 1)
;
}
uint32_t r = bo->refCount.fetch_sub(1);
if (r == 1) {
for (auto it : *bo->getResidency()) {
unreference(it);
}
auto unmapSize = bo->peekUnmapSize();
auto address = bo->isAllocated || unmapSize > 0 ? bo->address : nullptr;
auto allocatorType = bo->peekAllocationType();
if (bo->isReused) {
eraseSharedBufferObject(bo);
}
bo->close();
delete bo;
if (address) {
if (unmapSize) {
if (allocatorType == MMAP_ALLOCATOR) {
munmapFunction(address, unmapSize);
} else {
if (allocatorType == BIT32_ALLOCATOR_EXTERNAL) {
allocator32Bit->free(address, unmapSize);
} else {
UNRECOVERABLE_IF(allocatorType != BIT32_ALLOCATOR_INTERNAL)
internal32bitAllocator->free(address, unmapSize);
}
}
} else {
alignedFreeWrapper(address);
}
}
}
return r;
}
OCLRT::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size, uint64_t flags, bool softpin) {
drm_i915_gem_userptr userptr = {};
userptr.user_ptr = address;
userptr.user_size = size;
userptr.flags = static_cast<uint32_t>(flags);
if (this->drm->ioctl(DRM_IOCTL_I915_GEM_USERPTR, &userptr) != 0) {
return nullptr;
}
auto res = new (std::nothrow) BufferObject(this->drm, userptr.handle, false);
if (!res) {
DEBUG_BREAK_IF(true);
return nullptr;
}
res->size = size;
res->address = reinterpret_cast<void *>(address);
res->softPin(address);
return res;
}
DrmAllocation *DrmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) {
auto allocation = new DrmAllocation(nullptr, const_cast<void *>(hostPtr), hostPtrSize);
allocation->fragmentsStorage = handleStorage;
return allocation;
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory(size_t size, size_t alignment, bool forcePin, bool uncacheable) {
const size_t minAlignment = MemoryConstants::allocationAlignment;
size_t cAlignment = alignUp(std::max(alignment, minAlignment), minAlignment);
// When size == 0 allocate allocationAlignment
// It's needed to prevent overlapping pages with user pointers
size_t cSize = std::max(alignUp(size, minAlignment), minAlignment);
auto res = alignedMallocWrapper(cSize, cAlignment);
if (!res)
return nullptr;
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), cSize, 0, true);
if (!bo) {
alignedFreeWrapper(res);
return nullptr;
}
bo->isAllocated = true;
if (forcePinEnabled && pinBB != nullptr && forcePin && size >= this->pinThreshold) {
pinBB->pin(&bo, 1);
}
return new DrmAllocation(bo, res, cSize);
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory(size_t size, const void *ptr, bool forcePin) {
auto res = (DrmAllocation *)MemoryManager::allocateGraphicsMemory(size, const_cast<void *>(ptr), forcePin);
bool forcePinAllowed = res != nullptr && pinBB != nullptr && forcePinEnabled && forcePin && size >= this->pinThreshold;
if (!validateHostPtrMemory && forcePinAllowed) {
BufferObject *boArray[] = {res->getBO()};
pinBB->pin(boArray, 1);
}
return res;
}
DrmAllocation *DrmMemoryManager::allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin) {
return nullptr;
}
GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) {
if (!Gmm::allowTiling(*imgInfo.imgDesc)) {
auto alloc = allocateGraphicsMemory(imgInfo.size, MemoryConstants::preferredAlignment);
if (alloc) {
alloc->gmm = gmm;
}
return alloc;
}
auto gpuRange = mmapFunction(nullptr, imgInfo.size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
DEBUG_BREAK_IF(gpuRange == MAP_FAILED);
drm_i915_gem_create create = {0, 0, 0};
create.size = imgInfo.size;
auto ret = this->drm->ioctl(DRM_IOCTL_I915_GEM_CREATE, &create);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
auto bo = new (std::nothrow) BufferObject(this->drm, create.handle, true);
if (!bo) {
return nullptr;
}
bo->size = imgInfo.size;
bo->address = reinterpret_cast<void *>(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
auto ret2 = bo->setTiling(I915_TILING_Y, static_cast<uint32_t>(imgInfo.rowPitch));
DEBUG_BREAK_IF(ret2 != true);
((void)(ret2));
bo->setUnmapSize(imgInfo.size);
auto allocation = new DrmAllocation(bo, nullptr, (uint64_t)gpuRange, imgInfo.size);
bo->setAllocationType(MMAP_ALLOCATOR);
allocation->gmm = gmm;
return allocation;
}
DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemory(size_t size, void *ptr, MemoryType memoryType) {
auto allocatorToUse = memoryType == MemoryType::EXTERNAL_ALLOCATION ? allocator32Bit.get() : internal32bitAllocator.get();
auto allocationType = memoryType == MemoryType::EXTERNAL_ALLOCATION ? BIT32_ALLOCATOR_EXTERNAL : BIT32_ALLOCATOR_INTERNAL;
if (ptr) {
uintptr_t inputPtr = (uintptr_t)ptr;
auto allocationSize = alignSizeWholePage((void *)ptr, size);
auto realAllocationSize = allocationSize;
auto gpuVirtualAddress = allocatorToUse->allocate(realAllocationSize);
if (!gpuVirtualAddress) {
return nullptr;
}
auto alignedUserPointer = (uintptr_t)alignDown(ptr, MemoryConstants::pageSize);
auto inputPointerOffset = inputPtr - alignedUserPointer;
BufferObject *bo = allocUserptr(alignedUserPointer, allocationSize, 0, true);
if (!bo) {
allocatorToUse->free(gpuVirtualAddress, realAllocationSize);
return nullptr;
}
bo->isAllocated = false;
bo->setUnmapSize(realAllocationSize);
bo->address = gpuVirtualAddress;
uintptr_t offset = (uintptr_t)bo->address;
bo->softPin((uint64_t)offset);
bo->setAllocationType(allocationType);
auto drmAllocation = new DrmAllocation(bo, (void *)ptr, (uint64_t)ptrOffset(gpuVirtualAddress, inputPointerOffset), allocationSize);
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocatorToUse->getBase();
return drmAllocation;
}
size_t alignedAllocationSize = alignUp(size, MemoryConstants::pageSize);
auto allocationSize = alignedAllocationSize;
auto res = allocatorToUse->allocate(allocationSize);
if (!res) {
if (memoryType == MemoryType::EXTERNAL_ALLOCATION && device && device->getProgramCount() == 0) {
this->force32bitAllocations = false;
device->setForce32BitAddressing(false);
return (DrmAllocation *)createGraphicsAllocationWithRequiredBitness(size, ptr);
}
return nullptr;
}
BufferObject *bo = allocUserptr(reinterpret_cast<uintptr_t>(res), alignedAllocationSize, 0, true);
if (!bo) {
allocatorToUse->free(res, allocationSize);
return nullptr;
}
bo->isAllocated = true;
bo->setUnmapSize(allocationSize);
bo->setAllocationType(allocationType);
auto drmAllocation = new DrmAllocation(bo, res, alignedAllocationSize);
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocatorToUse->getBase();
return drmAllocation;
}
GraphicsAllocation *DrmMemoryManager::createInternalGraphicsAllocation(const void *ptr, size_t allocationSize) {
return allocate32BitGraphicsMemory(allocationSize, const_cast<void *>(ptr), MemoryType::INTERNAL_ALLOCATION);
}
BufferObject *DrmMemoryManager::findAndReferenceSharedBufferObject(int boHandle) {
BufferObject *bo = nullptr;
std::lock_guard<decltype(mtx)> lock(mtx);
for (const auto &i : sharingBufferObjects) {
if (i->handle == static_cast<int>(boHandle)) {
bo = i;
bo->reference();
break;
}
}
return bo;
}
BufferObject *DrmMemoryManager::createSharedBufferObject(int boHandle, size_t size, bool requireSpecificBitness) {
void *gpuRange = nullptr;
StorageAllocatorType storageType = UNKNOWN_ALLOCATOR;
if (requireSpecificBitness && this->force32bitAllocations) {
gpuRange = this->allocator32Bit->allocate(size);
storageType = BIT32_ALLOCATOR_EXTERNAL;
} else {
gpuRange = mmapFunction(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
storageType = MMAP_ALLOCATOR;
}
DEBUG_BREAK_IF(gpuRange == MAP_FAILED);
auto bo = new (std::nothrow) BufferObject(this->drm, boHandle, true);
if (!bo) {
return nullptr;
}
bo->size = size;
bo->address = reinterpret_cast<void *>(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
bo->setUnmapSize(size);
bo->setAllocationType(storageType);
return bo;
}
GraphicsAllocation *DrmMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool reuseBO) {
drm_prime_handle openFd = {0, 0, 0};
openFd.fd = handle;
auto ret = this->drm->ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
auto boHandle = openFd.handle;
BufferObject *bo = nullptr;
if (reuseBO) {
bo = findAndReferenceSharedBufferObject(boHandle);
}
if (bo == nullptr) {
size_t size = lseekFunction(handle, 0, SEEK_END);
bo = createSharedBufferObject(boHandle, size, requireSpecificBitness);
if (!bo) {
return nullptr;
}
if (reuseBO) {
pushSharedBufferObject(bo);
}
}
auto drmAllocation = new DrmAllocation(bo, bo->address, bo->size, handle);
if (requireSpecificBitness && this->force32bitAllocations) {
drmAllocation->is32BitAllocation = true;
drmAllocation->gpuBaseAddress = allocator32Bit->getBase();
}
return drmAllocation;
}
GraphicsAllocation *DrmMemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
void *gpuRange = mmapFunction(nullptr, sizeWithPadding, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
auto srcPtr = inputGraphicsAllocation->getUnderlyingBuffer();
auto srcSize = inputGraphicsAllocation->getUnderlyingBufferSize();
auto alignedSrcSize = alignUp(srcSize, MemoryConstants::pageSize);
auto alignedPtr = (uintptr_t)alignDown(srcPtr, MemoryConstants::pageSize);
auto offset = (uintptr_t)srcPtr - alignedPtr;
BufferObject *bo = allocUserptr(alignedPtr, alignedSrcSize, 0, true);
if (!bo) {
return nullptr;
}
bo->setAddress(gpuRange);
bo->softPin(reinterpret_cast<uint64_t>(gpuRange));
bo->setUnmapSize(sizeWithPadding);
bo->setAllocationType(MMAP_ALLOCATOR);
return new DrmAllocation(bo, (void *)srcPtr, (uint64_t)ptrOffset(gpuRange, offset), sizeWithPadding);
}
void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) {
DrmAllocation *input;
input = static_cast<DrmAllocation *>(gfxAllocation);
if (input == nullptr)
return;
if (input->gmm)
delete input->gmm;
if (gfxAllocation->fragmentsStorage.fragmentCount) {
cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation);
delete gfxAllocation;
return;
}
BufferObject *search = input->getBO();
if (gfxAllocation->peekSharedHandle() != Sharing::nonSharedResource) {
closeFunction(gfxAllocation->peekSharedHandle());
}
delete gfxAllocation;
search->wait(-1);
unreference(search);
}
uint64_t DrmMemoryManager::getSystemSharedMemory() {
uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES));
drm_i915_gem_context_param getContextParam = {};
getContextParam.param = I915_CONTEXT_PARAM_GTT_SIZE;
auto ret = drm->ioctl(DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &getContextParam);
DEBUG_BREAK_IF(ret != 0);
((void)(ret));
uint64_t gpuMemorySize = getContextParam.value;
return std::min(hostMemorySize, gpuMemorySize);
}
uint64_t DrmMemoryManager::getMaxApplicationAddress() {
return MemoryConstants::max32BitAppAddress + (uint64_t)is64bit * (MemoryConstants::max64BitAppAddress - MemoryConstants::max32BitAppAddress);
}
uint64_t DrmMemoryManager::getInternalHeapBaseAddress() {
return this->internal32bitAllocator->getBase();
}
MemoryManager::AllocationStatus DrmMemoryManager::populateOsHandles(OsHandleStorage &handleStorage) {
BufferObject *allocatedBos[max_fragments_count];
size_t numberOfBosAllocated = 0;
uint32_t indexesOfAllocatedBos[max_fragments_count];
for (unsigned int i = 0; i < max_fragments_count; i++) {
// If there is no fragment it means it already exists.
if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].fragmentSize) {
handleStorage.fragmentStorageData[i].osHandleStorage = new OsHandle();
handleStorage.fragmentStorageData[i].residency = new ResidencyData();
handleStorage.fragmentStorageData[i].osHandleStorage->bo = allocUserptr((uintptr_t)handleStorage.fragmentStorageData[i].cpuPtr,
handleStorage.fragmentStorageData[i].fragmentSize,
0,
true);
if (!handleStorage.fragmentStorageData[i].osHandleStorage->bo) {
handleStorage.fragmentStorageData[i].freeTheFragment = true;
return AllocationStatus::Error;
}
allocatedBos[numberOfBosAllocated] = handleStorage.fragmentStorageData[i].osHandleStorage->bo;
indexesOfAllocatedBos[numberOfBosAllocated] = i;
numberOfBosAllocated++;
hostPtrManager.storeFragment(handleStorage.fragmentStorageData[i]);
}
}
if (validateHostPtrMemory) {
int result = pinBB->pin(allocatedBos, numberOfBosAllocated);
if (result == EFAULT) {
for (uint32_t i = 0; i < numberOfBosAllocated; i++) {
handleStorage.fragmentStorageData[indexesOfAllocatedBos[i]].freeTheFragment = true;
}
return AllocationStatus::InvalidHostPointer;
} else if (result != 0) {
return AllocationStatus::Error;
}
}
return AllocationStatus::Success;
}
void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) {
for (unsigned int i = 0; i < max_fragments_count; i++) {
if (handleStorage.fragmentStorageData[i].freeTheFragment) {
if (handleStorage.fragmentStorageData[i].osHandleStorage->bo) {
BufferObject *search = handleStorage.fragmentStorageData[i].osHandleStorage->bo;
search->wait(-1);
auto refCount = unreference(search, true);
DEBUG_BREAK_IF(refCount != 1u);
((void)(refCount));
}
delete handleStorage.fragmentStorageData[i].osHandleStorage;
delete handleStorage.fragmentStorageData[i].residency;
}
}
}
BufferObject *DrmMemoryManager::getPinBB() const {
return pinBB;
}
void DrmMemoryManager::waitForDeletions() {
if (gemCloseWorker.get()) {
while (!gemCloseWorker->isEmpty())
;
}
MemoryManager::waitForDeletions();
}
bool DrmMemoryManager::setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable) {
DEBUG_BREAK_IF(writeEnable); //unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource)
auto bo = static_cast<DrmAllocation *>(&graphicsAllocation)->getBO();
if (bo == nullptr)
return false;
// move a buffer object to the CPU read, and possibly write domain, including waiting on flushes to occur
drm_i915_gem_set_domain set_domain = {};
set_domain.handle = bo->peekHandle();
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = writeEnable ? I915_GEM_DOMAIN_CPU : 0;
return drm->ioctl(DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0;
}
void *DrmMemoryManager::lockResource(GraphicsAllocation *graphicsAllocation) {
if (graphicsAllocation == nullptr)
return nullptr;
auto cpuPtr = graphicsAllocation->getUnderlyingBuffer();
if (cpuPtr != nullptr) {
auto success = setDomainCpu(*graphicsAllocation, false);
DEBUG_BREAK_IF(!success);
(void)success;
return cpuPtr;
}
auto bo = static_cast<DrmAllocation *>(graphicsAllocation)->getBO();
if (bo == nullptr)
return nullptr;
drm_i915_gem_mmap mmap_arg = {};
mmap_arg.handle = bo->peekHandle();
mmap_arg.size = bo->peekSize();
if (drm->ioctl(DRM_IOCTL_I915_GEM_MMAP, &mmap_arg) != 0) {
return nullptr;
}
bo->setLockedAddress(reinterpret_cast<void *>(mmap_arg.addr_ptr));
auto success = setDomainCpu(*graphicsAllocation, false);
DEBUG_BREAK_IF(!success);
(void)success;
return bo->peekLockedAddress();
}
void DrmMemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) {
if (graphicsAllocation == nullptr)
return;
auto cpuPtr = graphicsAllocation->getUnderlyingBuffer();
if (cpuPtr != nullptr) {
return;
}
auto bo = static_cast<DrmAllocation *>(graphicsAllocation)->getBO();
if (bo == nullptr)
return;
munmapFunction(bo->peekLockedAddress(), bo->peekSize());
bo->setLockedAddress(nullptr);
}
} // namespace OCLRT