Reorganization directory structure [3/n]

Change-Id: If3dfa3f6007f8810a6a1ae1a4f0c7da38544648d
This commit is contained in:
kamdiedrich
2020-02-23 21:00:51 +01:00
committed by sys_ocldev
parent e177b4fc0f
commit e072275ae6
711 changed files with 94 additions and 94 deletions

View File

@@ -0,0 +1,47 @@
#
# Copyright (C) 2019-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(NEO_CORE_MEMORY_MANAGER
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/allocations_list.h
${CMAKE_CURRENT_SOURCE_DIR}/allocation_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.h
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.cpp
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion.h
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/engine_limits.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.h
${CMAKE_CURRENT_SOURCE_DIR}/eviction_status.h
${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.cpp
${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/gfx_partition_init_additional_range.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.h
${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/graphics_allocation_extra.cpp
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_defines.h
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage.h
${CMAKE_CURRENT_SOURCE_DIR}/local_memory_usage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_memory_usage.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_constants.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_handler.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_status.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_pool.h
${CMAKE_CURRENT_SOURCE_DIR}/residency.cpp
${CMAKE_CURRENT_SOURCE_DIR}/residency.h
${CMAKE_CURRENT_SOURCE_DIR}/residency_container.h
${CMAKE_CURRENT_SOURCE_DIR}/surface.h
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h
)
set_property(GLOBAL PROPERTY NEO_CORE_MEMORY_MANAGER ${NEO_CORE_MEMORY_MANAGER})

View File

@@ -0,0 +1,76 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "device/sub_device.h"
#include "memory_manager/graphics_allocation.h"
namespace NEO {
struct ImageInfo;
struct AllocationProperties {
union {
struct {
uint32_t allocateMemory : 1;
uint32_t flushL3RequiredForRead : 1;
uint32_t flushL3RequiredForWrite : 1;
uint32_t forcePin : 1;
uint32_t uncacheable : 1;
uint32_t multiOsContextCapable : 1;
uint32_t readOnlyMultiStorage : 1;
uint32_t shareable : 1;
uint32_t resource48Bit : 1;
uint32_t reserved : 23;
} flags;
uint32_t allFlags = 0;
};
static_assert(sizeof(AllocationProperties::flags) == sizeof(AllocationProperties::allFlags), "");
const uint32_t rootDeviceIndex;
size_t size = 0;
size_t alignment = 0;
GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::UNKNOWN;
ImageInfo *imgInfo = nullptr;
bool multiStorageResource = false;
DeviceBitfield subDevicesBitfield{};
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
GraphicsAllocation::AllocationType allocationType)
: AllocationProperties(rootDeviceIndex, true, size, allocationType, false) {}
AllocationProperties(uint32_t rootDeviceIndex, bool allocateMemory,
ImageInfo &imgInfo,
GraphicsAllocation::AllocationType allocationType)
: AllocationProperties(rootDeviceIndex, allocateMemory, 0u, allocationType, false) {
this->imgInfo = &imgInfo;
}
AllocationProperties(uint32_t rootDeviceIndex,
bool allocateMemory,
size_t size,
GraphicsAllocation::AllocationType allocationType,
bool isMultiStorageAllocation)
: AllocationProperties(rootDeviceIndex, allocateMemory, size, allocationType, false, isMultiStorageAllocation, {}) {}
AllocationProperties(uint32_t rootDeviceIndexParam,
bool allocateMemoryParam,
size_t sizeParam,
GraphicsAllocation::AllocationType allocationTypeParam,
bool multiOsContextCapableParam,
bool isMultiStorageAllocationParam,
DeviceBitfield subDevicesBitfieldParam)
: rootDeviceIndex(rootDeviceIndexParam),
size(sizeParam),
allocationType(allocationTypeParam),
multiStorageResource(isMultiStorageAllocationParam),
subDevicesBitfield(subDevicesBitfieldParam) {
allFlags = 0;
flags.flushL3RequiredForRead = 1;
flags.flushL3RequiredForWrite = 1;
flags.allocateMemory = allocateMemoryParam;
flags.multiOsContextCapable = multiOsContextCapableParam;
}
};
} // namespace NEO

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/graphics_allocation.h"
#include <mutex>
namespace NEO {
class CommandStreamReceiver;
class AllocationsList : public IDList<GraphicsAllocation, true, true> {
public:
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation::AllocationType allocationType);
private:
GraphicsAllocation *detachAllocationImpl(GraphicsAllocation *, void *);
};
} // namespace NEO

View File

@@ -0,0 +1,41 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/deferrable_allocation_deletion.h"
#include "command_stream/command_stream_receiver.h"
#include "helpers/engine_control.h"
#include "memory_manager/memory_manager.h"
#include "os_interface/os_context.h"
namespace NEO {
DeferrableAllocationDeletion::DeferrableAllocationDeletion(MemoryManager &memoryManager, GraphicsAllocation &graphicsAllocation) : memoryManager(memoryManager),
graphicsAllocation(graphicsAllocation) {}
bool DeferrableAllocationDeletion::apply() {
if (graphicsAllocation.isUsed()) {
bool isStillUsed = false;
for (auto &engine : memoryManager.getRegisteredEngines()) {
auto contextId = engine.osContext->getContextId();
if (graphicsAllocation.isUsedByOsContext(contextId)) {
auto currentContextTaskCount = *engine.commandStreamReceiver->getTagAddress();
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
graphicsAllocation.releaseUsageInOsContext(contextId);
} else {
isStillUsed = true;
engine.commandStreamReceiver->flushBatchedSubmissions();
}
}
}
if (isStillUsed) {
return false;
}
}
memoryManager.freeGraphicsMemory(&graphicsAllocation);
return true;
}
} // namespace NEO

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/deferrable_deletion.h"
namespace NEO {
class GraphicsAllocation;
class MemoryManager;
class DeferrableAllocationDeletion : public DeferrableDeletion {
public:
DeferrableAllocationDeletion(MemoryManager &memoryManager, GraphicsAllocation &graphicsAllocation);
bool apply() override;
protected:
MemoryManager &memoryManager;
GraphicsAllocation &graphicsAllocation;
};
} // namespace NEO

View File

@@ -0,0 +1,18 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "utilities/idlist.h"
namespace NEO {
class DeferrableDeletion : public IDNode<DeferrableDeletion> {
public:
template <typename... Args>
static DeferrableDeletion *create(Args... args);
virtual bool apply() = 0;
};
} // namespace NEO

View File

@@ -0,0 +1,127 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/deferred_deleter.h"
#include "memory_manager/deferrable_deletion.h"
#include "os_interface/os_thread.h"
namespace NEO {
DeferredDeleter::DeferredDeleter() {
doWorkInBackground = false;
elementsToRelease = 0;
}
void DeferredDeleter::stop() {
// Called with threadMutex acquired
if (worker != nullptr) {
// Working thread was created so we can safely stop it
std::unique_lock<std::mutex> lock(queueMutex);
// Make sure that working thread really started
while (!doWorkInBackground) {
lock.unlock();
lock.lock();
}
// Signal working thread to finish its job
doWorkInBackground = false;
lock.unlock();
condition.notify_one();
// Wait for the working job to exit
worker->join();
// Delete working thread
worker.reset();
}
drain(false);
}
void DeferredDeleter::safeStop() {
std::lock_guard<std::mutex> lock(threadMutex);
stop();
}
DeferredDeleter::~DeferredDeleter() {
safeStop();
}
void DeferredDeleter::deferDeletion(DeferrableDeletion *deletion) {
std::unique_lock<std::mutex> lock(queueMutex);
elementsToRelease++;
queue.pushTailOne(*deletion);
lock.unlock();
condition.notify_one();
}
void DeferredDeleter::addClient() {
std::lock_guard<std::mutex> lock(threadMutex);
++numClients;
ensureThread();
}
void DeferredDeleter::removeClient() {
std::lock_guard<std::mutex> lock(threadMutex);
--numClients;
if (numClients == 0) {
stop();
}
}
void DeferredDeleter::ensureThread() {
if (worker != nullptr) {
return;
}
worker = Thread::create(run, reinterpret_cast<void *>(this));
}
bool DeferredDeleter::areElementsReleased() {
return elementsToRelease == 0;
}
bool DeferredDeleter::shouldStop() {
return !doWorkInBackground;
}
void *DeferredDeleter::run(void *arg) {
auto self = reinterpret_cast<DeferredDeleter *>(arg);
std::unique_lock<std::mutex> lock(self->queueMutex);
// Mark that working thread really started
self->doWorkInBackground = true;
do {
if (self->queue.peekIsEmpty()) {
// Wait for signal that some items are ready to be deleted
self->condition.wait(lock);
}
lock.unlock();
// Delete items placed into deferred delete queue
self->clearQueue();
lock.lock();
// Check whether working thread should be stopped
} while (!self->shouldStop());
lock.unlock();
return nullptr;
}
void DeferredDeleter::drain(bool blocking) {
clearQueue();
if (blocking) {
while (!areElementsReleased())
;
}
}
void DeferredDeleter::clearQueue() {
do {
auto deletion = queue.removeFrontOne();
if (deletion) {
if (deletion->apply()) {
elementsToRelease--;
} else {
queue.pushTailOne(*deletion.release());
}
}
} while (!queue.peekIsEmpty());
}
} // namespace NEO

View File

@@ -0,0 +1,54 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "utilities/idlist.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <utility>
namespace NEO {
class DeferrableDeletion;
class Thread;
class DeferredDeleter {
public:
DeferredDeleter();
virtual ~DeferredDeleter();
DeferredDeleter(const DeferredDeleter &) = delete;
DeferredDeleter &operator=(const DeferredDeleter &) = delete;
MOCKABLE_VIRTUAL void deferDeletion(DeferrableDeletion *deletion);
MOCKABLE_VIRTUAL void addClient();
MOCKABLE_VIRTUAL void removeClient();
MOCKABLE_VIRTUAL void drain(bool blocking);
protected:
void stop();
void safeStop();
void ensureThread();
MOCKABLE_VIRTUAL void clearQueue();
MOCKABLE_VIRTUAL bool areElementsReleased();
MOCKABLE_VIRTUAL bool shouldStop();
static void *run(void *);
std::atomic<bool> doWorkInBackground;
std::atomic<int> elementsToRelease;
std::unique_ptr<Thread> worker;
int32_t numClients = 0;
IDList<DeferrableDeletion, true> queue;
std::mutex queueMutex;
std::mutex threadMutex;
std::condition_variable condition;
};
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
namespace EngineLimits {
constexpr uint32_t maxHandleCount = 1u;
}; // namespace EngineLimits
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/memory_manager.h"
namespace NEO {
StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationProperties &properties) {
return {};
}
uint32_t StorageInfo::getNumHandles() const { return 1u; }
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
struct StorageInfo {
uint32_t getNumHandles() const;
uint32_t getMemoryBanks() const { return 0u; }
};
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
enum class EvictionStatus : uint32_t {
SUCCESS = 0,
FAILED,
NOT_APPLIED,
UNKNOWN
};
} // namespace NEO

View File

@@ -0,0 +1,148 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/gfx_partition.h"
#include "helpers/aligned_memory.h"
namespace NEO {
const std::array<HeapIndex, 4> GfxPartition::heap32Names{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
HeapIndex::HEAP_INTERNAL,
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
HeapIndex::HEAP_EXTERNAL}};
const std::array<HeapIndex, 7> GfxPartition::heapNonSvmNames{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
HeapIndex::HEAP_INTERNAL,
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
HeapIndex::HEAP_EXTERNAL,
HeapIndex::HEAP_STANDARD,
HeapIndex::HEAP_STANDARD64KB,
HeapIndex::HEAP_EXTENDED}};
GfxPartition::GfxPartition() : osMemory(OSMemory::create()) {}
GfxPartition::~GfxPartition() {
if (reservedCpuAddressRange) {
osMemory->releaseCpuAddressRange(reservedCpuAddressRange, reservedCpuAddressRangeSize);
}
}
void GfxPartition::Heap::init(uint64_t base, uint64_t size) {
this->base = base;
this->size = size;
// Exclude very first and very last 64K from GPU address range allocation
if (size > 2 * GfxPartition::heapGranularity) {
size -= 2 * GfxPartition::heapGranularity;
}
alloc = std::make_unique<HeapAllocator>(base + GfxPartition::heapGranularity, size);
}
void GfxPartition::freeGpuAddressRange(uint64_t ptr, size_t size) {
for (auto heapName : GfxPartition::heapNonSvmNames) {
auto &heap = getHeap(heapName);
if ((ptr > heap.getBase()) && ((ptr + size) < heap.getLimit())) {
heap.free(ptr, size);
break;
}
}
}
void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices) {
/*
* I. 64-bit builds:
*
* 1) 48-bit Full Range SVM gfx layout:
*
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
* |__________________________________|____|____|____|____|________________|______________|
* | | | | | | | |
* | gfxBase gfxTop
* 0x0 0x0000800000000000 0x0000FFFFFFFFFFFF
*
*
* 2) 47-bit Full Range SVM gfx layout:
*
* gfxSize = 2^47 / 4 = 0x200000000000
* ________________________________________________
* / \
* SVM / H0 H1 H2 H3 STANDARD STANDARD64K \ SVM
* |________________|____|____|____|____|________________|______________|_______________|
* | | | | | | | | |
* | gfxBase gfxTop |
* 0x0 reserveCpuAddressRange(gfxSize) 0x00007FFFFFFFFFFF
* \_____________________________________ SVM _________________________________________/
*
*
*
* 3) Limited Range gfx layout (no SVM):
*
* H0 H1 H2 H3 STANDARD STANDARD64K
* |____|____|____|____|____________________|__________________|
* | | | | | | |
* gfxBase gfxTop
* 0x0 0xFFF...FFF < 47 bit
*
*
* II. 32-bit builds:
*
* 1) 32-bit Full Range SVM gfx layout:
*
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
* |_______|____|____|____|____|________________|______________|
* | | | | | | | |
* | gfxBase gfxTop
* 0x0 0x100000000 gpuAddressSpace
*/
uint64_t gfxTop = gpuAddressSpace + 1;
uint64_t gfxBase = 0x0ull;
const uint64_t gfxHeap32Size = 4 * MemoryConstants::gigaByte;
if (is32bit) {
gfxBase = maxNBitValue(32) + 1;
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
} else {
if (gpuAddressSpace == maxNBitValue(48)) {
gfxBase = maxNBitValue(48 - 1) + 1;
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
} else if (gpuAddressSpace == maxNBitValue(47)) {
reservedCpuAddressRangeSize = cpuAddressRangeSizeToReserve;
UNRECOVERABLE_IF(reservedCpuAddressRangeSize == 0);
reservedCpuAddressRange = osMemory->reserveCpuAddressRange(reservedCpuAddressRangeSize);
UNRECOVERABLE_IF(reservedCpuAddressRange == nullptr);
UNRECOVERABLE_IF(!isAligned<GfxPartition::heapGranularity>(reservedCpuAddressRange));
gfxBase = reinterpret_cast<uint64_t>(reservedCpuAddressRange);
gfxTop = gfxBase + reservedCpuAddressRangeSize;
heapInit(HeapIndex::HEAP_SVM, 0ull, gpuAddressSpace + 1);
} else if (gpuAddressSpace < maxNBitValue(47)) {
gfxBase = 0ull;
heapInit(HeapIndex::HEAP_SVM, 0ull, 0ull);
} else {
initAdditionalRange(gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex, numRootDevices);
}
}
for (auto heap : GfxPartition::heap32Names) {
heapInit(heap, gfxBase, gfxHeap32Size);
gfxBase += gfxHeap32Size;
}
uint64_t gfxStandardSize = alignDown((gfxTop - gfxBase) >> 1, heapGranularity);
heapInit(HeapIndex::HEAP_STANDARD, gfxBase, gfxStandardSize);
gfxBase += gfxStandardSize;
// Split HEAP_STANDARD64K among root devices
auto gfxStandard64KBSize = alignDown(gfxStandardSize / numRootDevices, GfxPartition::heapGranularity);
heapInit(HeapIndex::HEAP_STANDARD64KB, gfxBase + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize);
}
} // namespace NEO

View File

@@ -0,0 +1,100 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/memory_constants.h"
#include "os_interface/os_memory.h"
#include "utilities/heap_allocator.h"
#include <array>
namespace NEO {
enum class HeapIndex : uint32_t {
HEAP_INTERNAL_DEVICE_MEMORY = 0u,
HEAP_INTERNAL = 1u,
HEAP_EXTERNAL_DEVICE_MEMORY = 2u,
HEAP_EXTERNAL = 3u,
HEAP_STANDARD,
HEAP_STANDARD64KB,
HEAP_SVM,
HEAP_EXTENDED,
// Please put new heap indexes above this line
TOTAL_HEAPS
};
class GfxPartition {
public:
GfxPartition();
MOCKABLE_VIRTUAL ~GfxPartition();
void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices);
void heapInit(HeapIndex heapIndex, uint64_t base, uint64_t size) {
getHeap(heapIndex).init(base, size);
}
uint64_t heapAllocate(HeapIndex heapIndex, size_t &size) {
return getHeap(heapIndex).allocate(size);
}
void heapFree(HeapIndex heapIndex, uint64_t ptr, size_t size) {
getHeap(heapIndex).free(ptr, size);
}
MOCKABLE_VIRTUAL void freeGpuAddressRange(uint64_t ptr, size_t size);
uint64_t getHeapBase(HeapIndex heapIndex) {
return getHeap(heapIndex).getBase();
}
uint64_t getHeapLimit(HeapIndex heapIndex) {
return getHeap(heapIndex).getLimit();
}
uint64_t getHeapMinimalAddress(HeapIndex heapIndex) {
return getHeapBase(heapIndex) + heapGranularity;
}
bool isLimitedRange() { return getHeap(HeapIndex::HEAP_SVM).getSize() == 0ull; }
static const uint64_t heapGranularity = MemoryConstants::pageSize64k;
static const std::array<HeapIndex, 4> heap32Names;
static const std::array<HeapIndex, 7> heapNonSvmNames;
protected:
void initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices);
class Heap {
public:
Heap() = default;
void init(uint64_t base, uint64_t size);
uint64_t getBase() const { return base; }
uint64_t getSize() const { return size; }
uint64_t getLimit() const { return size ? base + size - 1 : 0; }
uint64_t allocate(size_t &size) { return alloc->allocate(size); }
void free(uint64_t ptr, size_t size) { alloc->free(ptr, size); }
protected:
uint64_t base = 0, size = 0;
std::unique_ptr<HeapAllocator> alloc;
};
Heap &getHeap(HeapIndex heapIndex) {
return heaps[static_cast<uint32_t>(heapIndex)];
}
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heaps;
void *reservedCpuAddressRange = nullptr;
size_t reservedCpuAddressRangeSize = 0;
std::unique_ptr<OSMemory> osMemory;
};
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/gfx_partition.h"
namespace NEO {
void GfxPartition::initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices) {
UNRECOVERABLE_IF("Invalid GPU Address Range!");
}
} // namespace NEO

View File

@@ -0,0 +1,73 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "graphics_allocation.h"
#include "helpers/aligned_memory.h"
#include "memory_manager/memory_manager.h"
#include "opencl/source/utilities/logger.h"
namespace NEO {
void GraphicsAllocation::setAllocationType(AllocationType allocationType) {
this->allocationType = allocationType;
FileLoggerInstance().logAllocation(this);
}
GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress,
size_t sizeIn, MemoryPool::Type pool)
: rootDeviceIndex(rootDeviceIndex),
gpuBaseAddress(baseAddress),
gpuAddress(gpuAddress),
size(sizeIn),
cpuPtr(cpuPtrIn),
memoryPool(pool),
allocationType(allocationType),
usageInfos(MemoryManager::maxOsContextCount) {
}
GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn,
MemoryPool::Type pool)
: rootDeviceIndex(rootDeviceIndex),
gpuAddress(castToUint64(cpuPtrIn)),
size(sizeIn),
cpuPtr(cpuPtrIn),
memoryPool(pool),
allocationType(allocationType),
usageInfos(MemoryManager::maxOsContextCount) {
sharingInfo.sharedHandle = sharedHandleIn;
}
GraphicsAllocation::~GraphicsAllocation() = default;
void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
if (usageInfos[contextId].taskCount == objectNotUsed) {
registeredContextsNum++;
}
if (newTaskCount == objectNotUsed) {
registeredContextsNum--;
}
usageInfos[contextId].taskCount = newTaskCount;
}
std::string GraphicsAllocation::getAllocationInfoString() const {
return "";
}
uint32_t GraphicsAllocation::getUsedPageSize() const {
switch (this->memoryPool) {
case MemoryPool::System64KBPages:
case MemoryPool::System64KBPagesWith32BitGpuAddressing:
case MemoryPool::LocalMemory:
return MemoryConstants::pageSize64k;
default:
return MemoryConstants::pageSize;
}
}
constexpr uint32_t GraphicsAllocation::objectNotUsed;
constexpr uint32_t GraphicsAllocation::objectNotResident;
} // namespace NEO

View File

@@ -0,0 +1,293 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/debug_helpers.h"
#include "helpers/ptr_math.h"
#include "memory_manager/host_ptr_defines.h"
#include "memory_manager/memory_constants.h"
#include "memory_manager/memory_pool.h"
#include "utilities/idlist.h"
#include "utilities/stackvec.h"
#include "engine_limits.h"
#include "storage_info.h"
#include <array>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <mutex>
#include <vector>
namespace NEO {
using osHandle = unsigned int;
inline osHandle toOsHandle(const void *handle) {
return static_cast<osHandle>(castToUint64(handle));
}
enum class HeapIndex : uint32_t;
namespace Sharing {
constexpr auto nonSharedResource = 0u;
}
class Gmm;
class MemoryManager;
class GraphicsAllocation : public IDNode<GraphicsAllocation> {
public:
enum class AllocationType {
UNKNOWN = 0,
BUFFER,
BUFFER_COMPRESSED,
BUFFER_HOST_MEMORY,
COMMAND_BUFFER,
CONSTANT_SURFACE,
DEVICE_QUEUE_BUFFER,
EXTERNAL_HOST_PTR,
FILL_PATTERN,
GLOBAL_SURFACE,
IMAGE,
INDIRECT_OBJECT_HEAP,
INSTRUCTION_HEAP,
INTERNAL_HEAP,
INTERNAL_HOST_MEMORY,
KERNEL_ISA,
LINEAR_STREAM,
MAP_ALLOCATION,
MCS,
PIPE,
PREEMPTION,
PRINTF_SURFACE,
PRIVATE_SURFACE,
PROFILING_TAG_BUFFER,
SCRATCH_SURFACE,
SHARED_BUFFER,
SHARED_CONTEXT_IMAGE,
SHARED_IMAGE,
SHARED_RESOURCE_COPY,
SURFACE_STATE_HEAP,
SVM_CPU,
SVM_GPU,
SVM_ZERO_COPY,
TAG_BUFFER,
GLOBAL_FENCE,
TIMESTAMP_PACKET_TAG_BUFFER,
WRITE_COMBINED,
RING_BUFFER,
SEMAPHORE_BUFFER
};
~GraphicsAllocation() override;
GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
GraphicsAllocation(const GraphicsAllocation &) = delete;
GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool);
GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool);
uint32_t getRootDeviceIndex() const { return rootDeviceIndex; }
void *getUnderlyingBuffer() const { return cpuPtr; }
void *getDriverAllocatedCpuPtr() const { return driverAllocatedCpuPointer; }
void setDriverAllocatedCpuPtr(void *allocatedCpuPtr) { driverAllocatedCpuPointer = allocatedCpuPtr; }
void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
this->cpuPtr = cpuPtr;
this->gpuAddress = gpuAddress;
}
size_t getUnderlyingBufferSize() const { return size; }
void setSize(size_t size) { this->size = size; }
uint64_t getAllocationOffset() const {
return allocationOffset;
}
void setAllocationOffset(uint64_t offset) {
allocationOffset = offset;
}
uint64_t getGpuBaseAddress() const {
return gpuBaseAddress;
}
void setGpuBaseAddress(uint64_t baseAddress) {
gpuBaseAddress = baseAddress;
}
uint64_t getGpuAddress() const {
DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
return gpuAddress + allocationOffset;
}
uint64_t getGpuAddressToPatch() const {
DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
return gpuAddress + allocationOffset - gpuBaseAddress;
}
void lock(void *ptr) { lockedPtr = ptr; }
void unlock() { lockedPtr = nullptr; }
bool isLocked() const { return lockedPtr != nullptr; }
void *getLockedPtr() const { return lockedPtr; }
bool isCoherent() const { return allocationInfo.flags.coherent; }
void setCoherent(bool coherentIn) { allocationInfo.flags.coherent = coherentIn; }
void setEvictable(bool evictable) { allocationInfo.flags.evictable = evictable; }
bool peekEvictable() const { return allocationInfo.flags.evictable; }
bool isFlushL3Required() const { return allocationInfo.flags.flushL3Required; }
void setFlushL3Required(bool flushL3Required) { allocationInfo.flags.flushL3Required = flushL3Required; }
bool is32BitAllocation() const { return allocationInfo.flags.is32BitAllocation; }
void set32BitAllocation(bool is32BitAllocation) { allocationInfo.flags.is32BitAllocation = is32BitAllocation; }
void setAubWritable(bool writable, uint32_t banks);
bool isAubWritable(uint32_t banks) const;
void setTbxWritable(bool writable, uint32_t banks);
bool isTbxWritable(uint32_t banks) const;
void setAllocDumpable(bool dumpable) { aubInfo.allocDumpable = dumpable; }
bool isAllocDumpable() const { return aubInfo.allocDumpable; }
bool isMemObjectsAllocationWithWritableFlags() const { return aubInfo.memObjectsAllocationWithWritableFlags; }
void setMemObjectsAllocationWithWritableFlags(bool newValue) { aubInfo.memObjectsAllocationWithWritableFlags = newValue; }
void incReuseCount() { sharingInfo.reuseCount++; }
void decReuseCount() { sharingInfo.reuseCount--; }
uint32_t peekReuseCount() const { return sharingInfo.reuseCount; }
osHandle peekSharedHandle() const { return sharingInfo.sharedHandle; }
void setAllocationType(AllocationType allocationType);
AllocationType getAllocationType() const { return allocationType; }
MemoryPool::Type getMemoryPool() const { return memoryPool; }
bool isUsed() const { return registeredContextsNum > 0; }
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) { usageInfos[contextId].residencyTaskCount = newTaskCount; }
uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); }
bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
virtual std::string getAllocationInfoString() const;
virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; }
static bool isCpuAccessRequired(AllocationType allocationType) {
return allocationType == AllocationType::COMMAND_BUFFER ||
allocationType == AllocationType::CONSTANT_SURFACE ||
allocationType == AllocationType::GLOBAL_SURFACE ||
allocationType == AllocationType::INTERNAL_HEAP ||
allocationType == AllocationType::LINEAR_STREAM ||
allocationType == AllocationType::PIPE ||
allocationType == AllocationType::PRINTF_SURFACE ||
allocationType == AllocationType::TIMESTAMP_PACKET_TAG_BUFFER ||
allocationType == AllocationType::RING_BUFFER ||
allocationType == AllocationType::SEMAPHORE_BUFFER;
}
void *getReservedAddressPtr() const {
return this->reservedAddressRangeInfo.addressPtr;
}
size_t getReservedAddressSize() const {
return this->reservedAddressRangeInfo.rangeSize;
}
void setReservedAddressRange(void *reserveAddress, size_t size) {
this->reservedAddressRangeInfo.addressPtr = reserveAddress;
this->reservedAddressRangeInfo.rangeSize = size;
}
Gmm *getDefaultGmm() const {
return getGmm(0u);
}
Gmm *getGmm(uint32_t handleId) const {
return gmms[handleId];
}
void setDefaultGmm(Gmm *gmm) {
setGmm(gmm, 0u);
}
void setGmm(Gmm *gmm, uint32_t handleId) {
gmms[handleId] = gmm;
}
uint32_t getNumHandles() const { return storageInfo.getNumHandles(); }
uint32_t getUsedPageSize() const;
OsHandleStorage fragmentsStorage;
StorageInfo storageInfo = {};
static constexpr uint32_t defaultBank = 0b1u;
static constexpr uint32_t allBanks = 0xffffffff;
constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
protected:
struct UsageInfo {
uint32_t taskCount = objectNotUsed;
uint32_t residencyTaskCount = objectNotResident;
uint32_t inspectionId = 0u;
};
struct AubInfo {
uint32_t aubWritable = std::numeric_limits<uint32_t>::max();
uint32_t tbxWritable = std::numeric_limits<uint32_t>::max();
bool allocDumpable = false;
bool memObjectsAllocationWithWritableFlags = false;
};
struct SharingInfo {
uint32_t reuseCount = 0;
osHandle sharedHandle = Sharing::nonSharedResource;
};
struct AllocationInfo {
union {
struct {
uint32_t coherent : 1;
uint32_t evictable : 1;
uint32_t flushL3Required : 1;
uint32_t is32BitAllocation : 1;
uint32_t reserved : 28;
} flags;
uint32_t allFlags = 0u;
};
static_assert(sizeof(AllocationInfo::flags) == sizeof(AllocationInfo::allFlags), "");
AllocationInfo() {
flags.coherent = false;
flags.evictable = true;
flags.flushL3Required = true;
flags.is32BitAllocation = false;
}
};
struct ReservedAddressRange {
void *addressPtr = nullptr;
size_t rangeSize = 0;
};
friend class SubmissionAggregator;
const uint32_t rootDeviceIndex;
AllocationInfo allocationInfo;
AubInfo aubInfo;
SharingInfo sharingInfo;
ReservedAddressRange reservedAddressRangeInfo;
uint64_t allocationOffset = 0u;
uint64_t gpuBaseAddress = 0;
uint64_t gpuAddress = 0;
void *driverAllocatedCpuPointer = nullptr;
size_t size = 0;
void *cpuPtr = nullptr;
void *lockedPtr = nullptr;
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
AllocationType allocationType = AllocationType::UNKNOWN;
StackVec<UsageInfo, 32> usageInfos;
std::atomic<uint32_t> registeredContextsNum{0};
std::array<Gmm *, EngineLimits::maxHandleCount> gmms{};
};
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/graphics_allocation.h"
namespace NEO {
void GraphicsAllocation::setAubWritable(bool writable, uint32_t banks) { aubInfo.aubWritable = writable; }
bool GraphicsAllocation::isAubWritable(uint32_t banks) const { return (aubInfo.aubWritable != 0); }
void GraphicsAllocation::setTbxWritable(bool writable, uint32_t banks) { aubInfo.tbxWritable = writable; }
bool GraphicsAllocation::isTbxWritable(uint32_t banks) const { return (aubInfo.tbxWritable != 0); }
} // namespace NEO

View File

@@ -0,0 +1,76 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cinttypes>
#include <cstdlib>
namespace NEO {
struct OsHandle;
struct ResidencyData;
using OsGraphicsHandle = OsHandle;
constexpr int maxFragmentsCount = 3;
enum class FragmentPosition {
NONE = 0,
LEADING,
MIDDLE,
TRAILING
};
enum OverlapStatus {
FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER = 0,
FRAGMENT_WITHIN_STORED_FRAGMENT,
FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT,
FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT,
FRAGMENT_NOT_CHECKED
};
enum RequirementsStatus {
SUCCESS = 0,
FATAL
};
struct PartialAllocation {
FragmentPosition fragmentPosition = FragmentPosition::NONE;
const void *allocationPtr = nullptr;
size_t allocationSize = 0u;
};
struct AllocationRequirements {
PartialAllocation allocationFragments[maxFragmentsCount];
uint64_t totalRequiredSize = 0u;
uint32_t requiredFragmentsCount = 0u;
};
struct FragmentStorage {
const void *fragmentCpuPointer = nullptr;
size_t fragmentSize = 0;
int refCount = 0;
OsHandle *osInternalStorage = nullptr;
ResidencyData *residency = nullptr;
bool driverAllocation = false;
};
struct AllocationStorageData {
OsHandle *osHandleStorage = nullptr;
size_t fragmentSize = 0;
const void *cpuPtr = nullptr;
bool freeTheFragment = false;
ResidencyData *residency = nullptr;
};
struct OsHandleStorage {
AllocationStorageData fragmentStorageData[maxFragmentsCount];
uint32_t fragmentCount = 0;
};
} // namespace NEO

View File

@@ -0,0 +1,287 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/host_ptr_manager.h"
#include "memory_manager/memory_manager.h"
using namespace NEO;
HostPtrFragmentsContainer::iterator HostPtrManager::findElement(const void *ptr) {
auto nextElement = partialAllocations.lower_bound(ptr);
auto element = nextElement;
if (element != partialAllocations.end()) {
auto &storedFragment = element->second;
if (storedFragment.fragmentCpuPointer <= ptr) {
return element;
} else if (element != partialAllocations.begin()) {
element--;
auto &storedFragment = element->second;
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
if (storedFragment.fragmentSize == 0) {
storedEndAddress++;
}
if ((uintptr_t)ptr < (uintptr_t)storedEndAddress) {
return element;
}
}
} else if (element != partialAllocations.begin()) {
element--;
auto &storedFragment = element->second;
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
if (storedFragment.fragmentSize == 0) {
storedEndAddress++;
}
if ((uintptr_t)ptr < (uintptr_t)storedEndAddress) {
return element;
}
}
return partialAllocations.end();
}
AllocationRequirements HostPtrManager::getAllocationRequirements(const void *inputPtr, size_t size) {
AllocationRequirements requiredAllocations;
auto allocationCount = 0;
auto wholeAllocationSize = alignSizeWholePage(inputPtr, size);
auto alignedStartAddress = alignDown(inputPtr, MemoryConstants::pageSize);
bool leadingNeeded = false;
if (alignedStartAddress != inputPtr) {
leadingNeeded = true;
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignedStartAddress;
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::LEADING;
requiredAllocations.allocationFragments[allocationCount].allocationSize = MemoryConstants::pageSize;
allocationCount++;
}
auto endAddress = ptrOffset(inputPtr, size);
auto alignedEndAddress = alignDown(endAddress, MemoryConstants::pageSize);
bool trailingNeeded = false;
if (alignedEndAddress != endAddress && alignedEndAddress != alignedStartAddress) {
trailingNeeded = true;
}
auto middleSize = wholeAllocationSize - (trailingNeeded + leadingNeeded) * MemoryConstants::pageSize;
if (middleSize) {
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignUp(inputPtr, MemoryConstants::pageSize);
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::MIDDLE;
requiredAllocations.allocationFragments[allocationCount].allocationSize = middleSize;
allocationCount++;
}
if (trailingNeeded) {
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignedEndAddress;
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::TRAILING;
requiredAllocations.allocationFragments[allocationCount].allocationSize = MemoryConstants::pageSize;
allocationCount++;
}
requiredAllocations.totalRequiredSize = wholeAllocationSize;
requiredAllocations.requiredFragmentsCount = allocationCount;
return requiredAllocations;
}
OsHandleStorage HostPtrManager::populateAlreadyAllocatedFragments(AllocationRequirements &requirements) {
OsHandleStorage handleStorage;
for (unsigned int i = 0; i < requirements.requiredFragmentsCount; i++) {
OverlapStatus overlapStatus = OverlapStatus::FRAGMENT_NOT_CHECKED;
FragmentStorage *fragmentStorage = getFragmentAndCheckForOverlaps(const_cast<void *>(requirements.allocationFragments[i].allocationPtr), requirements.allocationFragments[i].allocationSize, overlapStatus);
if (overlapStatus == OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT) {
UNRECOVERABLE_IF(fragmentStorage == nullptr);
fragmentStorage->refCount++;
handleStorage.fragmentStorageData[i].osHandleStorage = fragmentStorage->osInternalStorage;
handleStorage.fragmentStorageData[i].cpuPtr = requirements.allocationFragments[i].allocationPtr;
handleStorage.fragmentStorageData[i].fragmentSize = requirements.allocationFragments[i].allocationSize;
handleStorage.fragmentStorageData[i].residency = fragmentStorage->residency;
} else if (overlapStatus != OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
if (fragmentStorage != nullptr) {
DEBUG_BREAK_IF(overlapStatus != OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT);
fragmentStorage->refCount++;
handleStorage.fragmentStorageData[i].osHandleStorage = fragmentStorage->osInternalStorage;
handleStorage.fragmentStorageData[i].residency = fragmentStorage->residency;
} else {
DEBUG_BREAK_IF(overlapStatus != OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER);
}
handleStorage.fragmentStorageData[i].cpuPtr = requirements.allocationFragments[i].allocationPtr;
handleStorage.fragmentStorageData[i].fragmentSize = requirements.allocationFragments[i].allocationSize;
} else {
//abort whole application instead of silently passing.
abortExecution();
return handleStorage;
}
}
handleStorage.fragmentCount = requirements.requiredFragmentsCount;
return handleStorage;
}
void HostPtrManager::storeFragment(FragmentStorage &fragment) {
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
auto element = findElement(fragment.fragmentCpuPointer);
if (element != partialAllocations.end()) {
element->second.refCount++;
} else {
fragment.refCount++;
partialAllocations.insert(std::pair<const void *, FragmentStorage>(fragment.fragmentCpuPointer, fragment));
}
}
void HostPtrManager::storeFragment(AllocationStorageData &storageData) {
FragmentStorage fragment;
fragment.fragmentCpuPointer = const_cast<void *>(storageData.cpuPtr);
fragment.fragmentSize = storageData.fragmentSize;
fragment.osInternalStorage = storageData.osHandleStorage;
fragment.residency = storageData.residency;
storeFragment(fragment);
}
std::unique_lock<std::recursive_mutex> HostPtrManager::obtainOwnership() {
return std::unique_lock<std::recursive_mutex>(allocationsMutex);
}
void HostPtrManager::releaseHandleStorage(OsHandleStorage &fragments) {
for (int i = 0; i < maxFragmentsCount; i++) {
if (fragments.fragmentStorageData[i].fragmentSize || fragments.fragmentStorageData[i].cpuPtr) {
fragments.fragmentStorageData[i].freeTheFragment = releaseHostPtr(fragments.fragmentStorageData[i].cpuPtr);
}
}
}
bool HostPtrManager::releaseHostPtr(const void *ptr) {
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
bool fragmentReadyToBeReleased = false;
auto element = findElement(ptr);
DEBUG_BREAK_IF(element == partialAllocations.end());
element->second.refCount--;
if (element->second.refCount <= 0) {
fragmentReadyToBeReleased = true;
partialAllocations.erase(element);
}
return fragmentReadyToBeReleased;
}
FragmentStorage *HostPtrManager::getFragment(const void *inputPtr) {
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
auto element = findElement(inputPtr);
if (element != partialAllocations.end()) {
return &element->second;
}
return nullptr;
}
//for given inputs see if any allocation overlaps
FragmentStorage *HostPtrManager::getFragmentAndCheckForOverlaps(const void *inPtr, size_t size, OverlapStatus &overlappingStatus) {
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
void *inputPtr = const_cast<void *>(inPtr);
auto nextElement = partialAllocations.lower_bound(inputPtr);
auto element = nextElement;
overlappingStatus = OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER;
if (element != partialAllocations.begin()) {
element--;
}
if (element != partialAllocations.end()) {
auto &storedFragment = element->second;
if (storedFragment.fragmentCpuPointer == inputPtr && storedFragment.fragmentSize == size) {
overlappingStatus = OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT;
return &element->second;
}
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
auto inputEndAddress = (uintptr_t)inputPtr + size;
if (inputPtr >= storedFragment.fragmentCpuPointer && (uintptr_t)inputPtr < (uintptr_t)storedEndAddress) {
if (inputEndAddress <= storedEndAddress) {
overlappingStatus = OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT;
return &element->second;
} else {
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
return nullptr;
}
}
//next fragment doesn't have to be after the inputPtr
if (nextElement != partialAllocations.end()) {
auto &storedNextElement = nextElement->second;
auto storedNextEndAddress = (uintptr_t)storedNextElement.fragmentCpuPointer + storedNextElement.fragmentSize;
auto storedNextStartAddress = (uintptr_t)storedNextElement.fragmentCpuPointer;
//check if this allocation is after the inputPtr
if ((uintptr_t)inputPtr < storedNextStartAddress) {
if (inputEndAddress > storedNextStartAddress) {
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
return nullptr;
}
} else if (inputEndAddress > storedNextEndAddress) {
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
return nullptr;
} else {
DEBUG_BREAK_IF((uintptr_t)inputPtr != storedNextStartAddress);
if (inputEndAddress < storedNextEndAddress) {
overlappingStatus = OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT;
} else {
DEBUG_BREAK_IF(inputEndAddress != storedNextEndAddress);
overlappingStatus = OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT;
}
return &nextElement->second;
}
}
}
return nullptr;
}
OsHandleStorage HostPtrManager::prepareOsStorageForAllocation(MemoryManager &memoryManager, size_t size, const void *ptr, uint32_t rootDeviceIndex) {
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
auto requirements = HostPtrManager::getAllocationRequirements(ptr, size);
UNRECOVERABLE_IF(checkAllocationsForOverlapping(memoryManager, &requirements) == RequirementsStatus::FATAL);
auto osStorage = populateAlreadyAllocatedFragments(requirements);
if (osStorage.fragmentCount > 0) {
if (memoryManager.populateOsHandles(osStorage, rootDeviceIndex) != MemoryManager::AllocationStatus::Success) {
memoryManager.cleanOsHandles(osStorage, rootDeviceIndex);
osStorage.fragmentCount = 0;
}
}
return osStorage;
}
RequirementsStatus HostPtrManager::checkAllocationsForOverlapping(MemoryManager &memoryManager, AllocationRequirements *requirements) {
UNRECOVERABLE_IF(requirements == nullptr);
RequirementsStatus status = RequirementsStatus::SUCCESS;
for (unsigned int i = 0; i < requirements->requiredFragmentsCount; i++) {
OverlapStatus overlapStatus = OverlapStatus::FRAGMENT_NOT_CHECKED;
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
// clean temporary allocations
memoryManager.cleanTemporaryAllocationListOnAllEngines(false);
// check overlapping again
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
// Wait for completion
memoryManager.cleanTemporaryAllocationListOnAllEngines(true);
// check overlapping last time
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
status = RequirementsStatus::FATAL;
break;
}
}
}
}
return status;
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/host_ptr_defines.h"
#include <map>
#include <mutex>
namespace NEO {
using HostPtrFragmentsContainer = std::map<const void *, FragmentStorage>;
class MemoryManager;
class HostPtrManager {
public:
FragmentStorage *getFragment(const void *inputPtr);
OsHandleStorage prepareOsStorageForAllocation(MemoryManager &memoryManager, size_t size, const void *ptr, uint32_t rootDeviceIndex);
void releaseHandleStorage(OsHandleStorage &fragments);
bool releaseHostPtr(const void *ptr);
void storeFragment(AllocationStorageData &storageData);
void storeFragment(FragmentStorage &fragment);
std::unique_lock<std::recursive_mutex> obtainOwnership();
protected:
static AllocationRequirements getAllocationRequirements(const void *inputPtr, size_t size);
OsHandleStorage populateAlreadyAllocatedFragments(AllocationRequirements &requirements);
FragmentStorage *getFragmentAndCheckForOverlaps(const void *inputPtr, size_t size, OverlapStatus &overlappingStatus);
RequirementsStatus checkAllocationsForOverlapping(MemoryManager &memoryManager, AllocationRequirements *requirements);
HostPtrFragmentsContainer::iterator findElement(const void *ptr);
HostPtrFragmentsContainer partialAllocations;
std::recursive_mutex allocationsMutex;
};
} // namespace NEO

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/internal_allocation_storage.h"
#include "command_stream/command_stream_receiver.h"
#include "memory_manager/host_ptr_manager.h"
#include "memory_manager/memory_manager.h"
#include "os_interface/os_context.h"
namespace NEO {
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver){};
void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage) {
uint32_t taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId());
if (allocationUsage == REUSABLE_ALLOCATION) {
taskCount = commandStreamReceiver.peekTaskCount();
}
storeAllocationWithTaskCount(std::move(gfxAllocation), allocationUsage, taskCount);
}
void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage, uint32_t taskCount) {
if (allocationUsage == REUSABLE_ALLOCATION) {
if (DebugManager.flags.DisableResourceRecycling.get()) {
commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release());
return;
}
}
auto &allocationsList = (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse;
gfxAllocation->updateTaskCount(taskCount, commandStreamReceiver.getOsContext().getContextId());
allocationsList.pushTailOne(*gfxAllocation.release());
}
void InternalAllocationStorage::cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) {
freeAllocationsList(waitTaskCount, (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse);
}
void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList) {
auto memoryManager = commandStreamReceiver.getMemoryManager();
auto lock = memoryManager->getHostPtrManager()->obtainOwnership();
GraphicsAllocation *curr = allocationsList.detachNodes();
IDList<GraphicsAllocation, false, true> allocationsLeft;
while (curr != nullptr) {
auto *next = curr->next;
if (curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount) {
memoryManager->freeGraphicsMemory(curr);
} else {
allocationsLeft.pushTailOne(*curr);
}
curr = next;
}
if (allocationsLeft.peekIsEmpty() == false) {
allocationsList.splice(*allocationsLeft.detachNodes());
}
}
std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, GraphicsAllocation::AllocationType allocationType) {
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver, allocationType);
return allocation;
}
struct ReusableAllocationRequirements {
size_t requiredMinimalSize;
volatile uint32_t *csrTagAddress;
GraphicsAllocation::AllocationType allocationType;
uint32_t contextId;
};
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation::AllocationType allocationType) {
ReusableAllocationRequirements req;
req.requiredMinimalSize = requiredMinimalSize;
req.csrTagAddress = commandStreamReceiver.getTagAddress();
req.allocationType = allocationType;
req.contextId = commandStreamReceiver.getOsContext().getContextId();
GraphicsAllocation *a = nullptr;
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
return std::unique_ptr<GraphicsAllocation>(retAlloc);
}
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
auto *curr = head;
while (curr != nullptr) {
auto currentTagValue = *req->csrTagAddress;
if ((req->allocationType == curr->getAllocationType()) &&
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
(currentTagValue >= curr->getTaskCount(req->contextId))) {
return removeOneImpl(curr, nullptr);
}
curr = curr->next;
}
return nullptr;
}
} // namespace NEO

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/allocations_list.h"
namespace NEO {
class CommandStreamReceiver;
class InternalAllocationStorage {
public:
MOCKABLE_VIRTUAL ~InternalAllocationStorage() = default;
InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver);
MOCKABLE_VIRTUAL void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage);
void storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage);
void storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage, uint32_t taskCount);
std::unique_ptr<GraphicsAllocation> obtainReusableAllocation(size_t requiredSize, GraphicsAllocation::AllocationType allocationType);
AllocationsList &getTemporaryAllocations() { return temporaryAllocations; }
AllocationsList &getAllocationsForReuse() { return allocationsForReuse; }
protected:
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
CommandStreamReceiver &commandStreamReceiver;
AllocationsList temporaryAllocations;
AllocationsList allocationsForReuse;
};
} // namespace NEO

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/local_memory_usage.h"
#include <algorithm>
#include <bitset>
#include <iterator>
namespace NEO {
LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector(uint32_t banksCount) : banksCount(banksCount) {
UNRECOVERABLE_IF(banksCount == 0);
memorySizes.reset(new std::atomic<uint64_t>[banksCount]);
for (uint32_t i = 0; i < banksCount; i++) {
memorySizes[i] = 0;
}
}
uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank() {
auto leastOccupiedBankIterator = std::min_element(memorySizes.get(), memorySizes.get() + banksCount);
return static_cast<uint32_t>(std::distance(memorySizes.get(), leastOccupiedBankIterator));
}
void LocalMemoryUsageBankSelector::freeOnBank(uint32_t bankIndex, uint64_t allocationSize) {
UNRECOVERABLE_IF(bankIndex >= banksCount);
memorySizes[bankIndex] -= allocationSize;
}
void LocalMemoryUsageBankSelector::reserveOnBank(uint32_t bankIndex, uint64_t allocationSize) {
UNRECOVERABLE_IF(bankIndex >= banksCount);
memorySizes[bankIndex] += allocationSize;
}
void LocalMemoryUsageBankSelector::updateUsageInfo(uint32_t memoryBanks, uint64_t allocationSize, bool reserve) {
auto banks = std::bitset<32>(memoryBanks);
for (uint32_t bankIndex = 0; bankIndex < banks.size() && bankIndex < banksCount; bankIndex++) {
if (banks.test(bankIndex)) {
if (reserve) {
reserveOnBank(bankIndex, allocationSize);
} else {
freeOnBank(bankIndex, allocationSize);
}
}
}
}
} // namespace NEO

View File

@@ -0,0 +1,41 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/debug_helpers.h"
#include "helpers/non_copyable_or_moveable.h"
#include <atomic>
#include <memory>
namespace NEO {
class LocalMemoryUsageBankSelector : public NonCopyableOrMovableClass {
public:
LocalMemoryUsageBankSelector() = delete;
LocalMemoryUsageBankSelector(uint32_t banksCount);
uint32_t getLeastOccupiedBank();
void reserveOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
updateUsageInfo(memoryBanks, allocationSize, true);
}
void freeOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
updateUsageInfo(memoryBanks, allocationSize, false);
}
uint64_t getOccupiedMemorySizeForBank(uint32_t bankIndex) {
UNRECOVERABLE_IF(bankIndex >= banksCount);
return memorySizes[bankIndex].load();
}
protected:
uint32_t banksCount = 0;
std::unique_ptr<std::atomic<uint64_t>[]> memorySizes = nullptr;
void updateUsageInfo(uint32_t memoryBanks, uint64_t allocationSize, bool reserve);
void freeOnBank(uint32_t bankIndex, uint64_t allocationSize);
void reserveOnBank(uint32_t bankIndex, uint64_t allocationSize);
};
} // namespace NEO

View File

@@ -0,0 +1,60 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <limits>
constexpr bool is32bit = (sizeof(void *) == 4);
constexpr bool is64bit = (sizeof(void *) == 8);
constexpr uint64_t maxNBitValue(uint64_t n) {
return ((1ULL << n) - 1);
}
static_assert(maxNBitValue(8) == std::numeric_limits<uint8_t>::max(), "");
static_assert(maxNBitValue(16) == std::numeric_limits<uint16_t>::max(), "");
static_assert(maxNBitValue(32) == std::numeric_limits<uint32_t>::max(), "");
namespace MemoryConstants {
constexpr uint64_t zoneHigh = ~(uint64_t)0xFFFFFFFF;
constexpr uint64_t kiloByte = 1024;
constexpr uint64_t kiloByteShiftSize = 10;
constexpr uint64_t megaByte = 1024 * kiloByte;
constexpr uint64_t gigaByte = 1024 * megaByte;
constexpr size_t minBufferAlignment = 4;
constexpr size_t cacheLineSize = 64;
constexpr size_t pageSize = 4 * kiloByte;
constexpr size_t pageSize64k = 64 * kiloByte;
constexpr size_t preferredAlignment = pageSize; // alignment preferred for performance reasons, i.e. internal allocations
constexpr size_t allocationAlignment = pageSize; // alignment required to gratify incoming pointer, i.e. passed host_ptr
constexpr size_t slmWindowAlignment = 128 * kiloByte;
constexpr size_t slmWindowSize = 64 * kiloByte;
constexpr uintptr_t pageMask = (pageSize - 1);
constexpr uintptr_t page64kMask = (pageSize64k - 1);
constexpr uint64_t max32BitAppAddress = maxNBitValue(31);
constexpr uint64_t max64BitAppAddress = maxNBitValue(47);
constexpr uint32_t sizeOf4GBinPageEntities = (MemoryConstants::gigaByte * 4 - MemoryConstants::pageSize) / MemoryConstants::pageSize;
constexpr uint64_t max32BitAddress = maxNBitValue(32);
constexpr uint64_t max36BitAddress = (maxNBitValue(36));
constexpr uint64_t max48BitAddress = maxNBitValue(48);
constexpr uintptr_t page4kEntryMask = std::numeric_limits<uintptr_t>::max() & ~MemoryConstants::pageMask;
constexpr uintptr_t page64kEntryMask = std::numeric_limits<uintptr_t>::max() & ~MemoryConstants::page64kMask;
constexpr int GfxAddressBits = is64bit ? 48 : 32;
constexpr uint64_t maxSvmAddress = is64bit ? maxNBitValue(47) : maxNBitValue(32);
} // namespace MemoryConstants
namespace BlitterConstants {
constexpr uint64_t maxBlitWidth = 0x7FC0; // 0x7FFF aligned to cacheline size
constexpr uint64_t maxBlitHeight = 0x7FFF;
enum class BlitDirection : uint32_t {
BufferToHostPtr,
HostPtrToBuffer,
BufferToBuffer
};
} // namespace BlitterConstants

View File

@@ -0,0 +1,577 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/memory_manager.h"
#include "command_stream/command_stream_receiver.h"
#include "debug_settings/debug_settings_manager.h"
#include "execution_environment/root_device_environment.h"
#include "gmm_helper/gmm.h"
#include "gmm_helper/gmm_helper.h"
#include "gmm_helper/page_table_mngr.h"
#include "gmm_helper/resource_info.h"
#include "helpers/aligned_memory.h"
#include "helpers/basic_math.h"
#include "helpers/hw_helper.h"
#include "helpers/hw_info.h"
#include "helpers/string.h"
#include "helpers/surface_format_info.h"
#include "memory_manager/deferrable_allocation_deletion.h"
#include "memory_manager/deferred_deleter.h"
#include "memory_manager/host_ptr_manager.h"
#include "memory_manager/internal_allocation_storage.h"
#include "os_interface/os_context.h"
#include "os_interface/os_interface.h"
#include "utilities/compiler_support.h"
#include "utilities/stackvec.h"
#include <algorithm>
namespace NEO {
uint32_t MemoryManager::maxOsContextCount = 0u;
MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : executionEnvironment(executionEnvironment), hostPtrManager(std::make_unique<HostPtrManager>()),
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
localMemoryUsageBankSelector.reset(new LocalMemoryUsageBankSelector(getBanksCount()));
bool anyLocalMemorySupported = false;
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); ++rootDeviceIndex) {
auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
this->localMemorySupported.push_back(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getEnableLocalMemory(*hwInfo));
this->enable64kbpages.push_back(OSInterface::osEnabled64kbPages && hwInfo->capabilityTable.ftr64KBpages);
if (DebugManager.flags.Enable64kbpages.get() > -1) {
this->enable64kbpages[rootDeviceIndex] = DebugManager.flags.Enable64kbpages.get() != 0;
}
gfxPartitions.push_back(std::make_unique<GfxPartition>());
anyLocalMemorySupported |= this->localMemorySupported[rootDeviceIndex];
}
if (anyLocalMemorySupported) {
pageFaultManager = PageFaultManager::create();
}
}
MemoryManager::~MemoryManager() {
for (auto &engine : registeredEngines) {
engine.osContext->decRefInternal();
}
if (reservedMemory) {
MemoryManager::alignedFreeWrapper(reservedMemory);
}
}
void *MemoryManager::allocateSystemMemory(size_t size, size_t alignment) {
// Establish a minimum alignment of 16bytes.
constexpr size_t minAlignment = 16;
alignment = std::max(alignment, minAlignment);
auto restrictions = getAlignedMallocRestrictions();
void *ptr = alignedMallocWrapper(size, alignment);
if (restrictions == nullptr || restrictions->minAddress == 0) {
return ptr;
}
if (restrictions->minAddress > reinterpret_cast<uintptr_t>(ptr) && ptr != nullptr) {
StackVec<void *, 100> invalidMemVector;
invalidMemVector.push_back(ptr);
do {
ptr = alignedMallocWrapper(size, alignment);
if (restrictions->minAddress > reinterpret_cast<uintptr_t>(ptr) && ptr != nullptr) {
invalidMemVector.push_back(ptr);
} else {
break;
}
} while (1);
for (auto &it : invalidMemVector) {
alignedFreeWrapper(it);
}
}
return ptr;
}
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) {
if (deferredDeleter) {
deferredDeleter->drain(true);
}
GraphicsAllocation *graphicsAllocation = nullptr;
auto osStorage = hostPtrManager->prepareOsStorageForAllocation(*this, allocationData.size, allocationData.hostPtr, allocationData.rootDeviceIndex);
if (osStorage.fragmentCount > 0) {
graphicsAllocation = createGraphicsAllocation(osStorage, allocationData);
}
return graphicsAllocation;
}
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData) {
bool copyRequired = isCopyRequired(*allocationData.imgInfo, allocationData.hostPtr);
if (allocationData.hostPtr && !copyRequired) {
return allocateGraphicsMemoryWithHostPtr(allocationData);
}
return nullptr;
}
void MemoryManager::cleanGraphicsMemoryCreatedFromHostPtr(GraphicsAllocation *graphicsAllocation) {
hostPtrManager->releaseHandleStorage(graphicsAllocation->fragmentsStorage);
cleanOsHandles(graphicsAllocation->fragmentsStorage, graphicsAllocation->getRootDeviceIndex());
}
GraphicsAllocation *MemoryManager::createGraphicsAllocationWithPadding(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
return createPaddedAllocation(inputGraphicsAllocation, sizeWithPadding);
}
GraphicsAllocation *MemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
return allocateGraphicsMemoryWithProperties({inputGraphicsAllocation->getRootDeviceIndex(), sizeWithPadding, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY});
}
void MemoryManager::freeSystemMemory(void *ptr) {
::alignedFree(ptr);
}
void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
if (!gfxAllocation) {
return;
}
const bool hasFragments = gfxAllocation->fragmentsStorage.fragmentCount != 0;
const bool isLocked = gfxAllocation->isLocked();
DEBUG_BREAK_IF(hasFragments && isLocked);
if (!hasFragments) {
handleFenceCompletion(gfxAllocation);
}
if (isLocked) {
freeAssociatedResourceImpl(*gfxAllocation);
}
localMemoryUsageBankSelector->freeOnBanks(gfxAllocation->storageInfo.getMemoryBanks(), gfxAllocation->getUnderlyingBufferSize());
freeGraphicsMemoryImpl(gfxAllocation);
}
//if not in use destroy in place
//if in use pass to temporary allocation list that is cleaned on blocking calls
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
if (gfxAllocation->isUsed()) {
if (gfxAllocation->isUsedByManyOsContexts()) {
multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation});
multiContextResourceDestructor->drain(false);
return;
}
for (auto &engine : getRegisteredEngines()) {
auto osContextId = engine.osContext->getContextId();
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
if (gfxAllocation->isUsedByOsContext(osContextId) &&
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
engine.commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation),
TEMPORARY_ALLOCATION);
return;
}
}
}
freeGraphicsMemory(gfxAllocation);
}
void MemoryManager::waitForDeletions() {
if (deferredDeleter) {
deferredDeleter->drain(false);
}
deferredDeleter.reset(nullptr);
}
bool MemoryManager::isAsyncDeleterEnabled() const {
return asyncDeleterEnabled;
}
bool MemoryManager::isLocalMemorySupported(uint32_t rootDeviceIndex) const {
return localMemorySupported[rootDeviceIndex];
}
bool MemoryManager::peek64kbPagesEnabled(uint32_t rootDeviceIndex) const {
return enable64kbpages[rootDeviceIndex];
}
bool MemoryManager::isMemoryBudgetExhausted() const {
return false;
}
OsContext *MemoryManager::createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority) {
auto contextId = ++latestContextId;
auto osContext = OsContext::create(peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->osInterface.get(), contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
UNRECOVERABLE_IF(!osContext->isInitialized());
osContext->incRefInternal();
registeredEngines.emplace_back(commandStreamReceiver, osContext);
return osContext;
}
bool MemoryManager::getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo) {
UNRECOVERABLE_IF(hostPtr == nullptr && !properties.flags.allocateMemory);
UNRECOVERABLE_IF(properties.allocationType == GraphicsAllocation::AllocationType::UNKNOWN);
bool allow64KbPages = false;
bool allow32Bit = false;
bool forcePin = properties.flags.forcePin;
bool mayRequireL3Flush = false;
switch (properties.allocationType) {
case GraphicsAllocation::AllocationType::BUFFER:
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
case GraphicsAllocation::AllocationType::CONSTANT_SURFACE:
case GraphicsAllocation::AllocationType::GLOBAL_SURFACE:
case GraphicsAllocation::AllocationType::PIPE:
case GraphicsAllocation::AllocationType::PRINTF_SURFACE:
case GraphicsAllocation::AllocationType::PRIVATE_SURFACE:
case GraphicsAllocation::AllocationType::SCRATCH_SURFACE:
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
allow64KbPages = true;
allow32Bit = true;
default:
break;
}
switch (properties.allocationType) {
case GraphicsAllocation::AllocationType::SVM_GPU:
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
allow64KbPages = true;
default:
break;
}
switch (properties.allocationType) {
case GraphicsAllocation::AllocationType::BUFFER:
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
forcePin = true;
default:
break;
}
switch (properties.allocationType) {
case GraphicsAllocation::AllocationType::BUFFER:
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR:
case GraphicsAllocation::AllocationType::GLOBAL_SURFACE:
case GraphicsAllocation::AllocationType::IMAGE:
case GraphicsAllocation::AllocationType::MAP_ALLOCATION:
case GraphicsAllocation::AllocationType::PIPE:
case GraphicsAllocation::AllocationType::SHARED_BUFFER:
case GraphicsAllocation::AllocationType::SHARED_IMAGE:
case GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY:
case GraphicsAllocation::AllocationType::SVM_CPU:
case GraphicsAllocation::AllocationType::SVM_GPU:
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
mayRequireL3Flush = true;
default:
break;
}
switch (properties.allocationType) {
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
case GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER:
case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR:
case GraphicsAllocation::AllocationType::FILL_PATTERN:
case GraphicsAllocation::AllocationType::MAP_ALLOCATION:
case GraphicsAllocation::AllocationType::MCS:
case GraphicsAllocation::AllocationType::PREEMPTION:
case GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER:
case GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE:
case GraphicsAllocation::AllocationType::SVM_CPU:
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
case GraphicsAllocation::AllocationType::TAG_BUFFER:
case GraphicsAllocation::AllocationType::GLOBAL_FENCE:
case GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY:
allocationData.flags.useSystemMemory = true;
default:
break;
}
allocationData.flags.shareable = properties.flags.shareable;
allocationData.flags.requiresCpuAccess = GraphicsAllocation::isCpuAccessRequired(properties.allocationType);
allocationData.flags.allocateMemory = properties.flags.allocateMemory;
allocationData.flags.allow32Bit = allow32Bit;
allocationData.flags.allow64kbPages = allow64KbPages;
allocationData.flags.forcePin = forcePin;
allocationData.flags.uncacheable = properties.flags.uncacheable;
allocationData.flags.flushL3 =
(mayRequireL3Flush ? properties.flags.flushL3RequiredForRead | properties.flags.flushL3RequiredForWrite : 0u);
allocationData.flags.preferRenderCompressed = GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == properties.allocationType;
allocationData.flags.multiOsContextCapable = properties.flags.multiOsContextCapable;
allocationData.hostPtr = hostPtr;
allocationData.size = properties.size;
allocationData.type = properties.allocationType;
allocationData.storageInfo = storageInfo;
allocationData.alignment = properties.alignment ? properties.alignment : MemoryConstants::preferredAlignment;
allocationData.imgInfo = properties.imgInfo;
if (allocationData.flags.allocateMemory) {
allocationData.hostPtr = nullptr;
}
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
allocationData.flags.resource48Bit = properties.flags.resource48Bit;
return true;
}
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr) {
AllocationData allocationData;
getAllocationData(allocationData, properties, hostPtr, createStorageInfoFromProperties(properties));
overrideAllocationData(allocationData, properties);
AllocationStatus status = AllocationStatus::Error;
GraphicsAllocation *allocation = allocateGraphicsMemoryInDevicePool(allocationData, status);
if (allocation) {
localMemoryUsageBankSelector->reserveOnBanks(allocationData.storageInfo.getMemoryBanks(), allocation->getUnderlyingBufferSize());
}
if (!allocation && status == AllocationStatus::RetryInNonDevicePool) {
allocation = allocateGraphicsMemory(allocationData);
}
FileLoggerInstance().logAllocation(allocation);
return allocation;
}
bool MemoryManager::mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) {
auto index = graphicsAllocation->getRootDeviceIndex();
if (executionEnvironment.rootDeviceEnvironments[index]->pageTableManager.get()) {
return executionEnvironment.rootDeviceEnvironments[index]->pageTableManager->updateAuxTable(graphicsAllocation->getGpuAddress(), graphicsAllocation->getDefaultGmm(), true);
}
return false;
}
GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData &allocationData) {
if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE || allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) {
UNRECOVERABLE_IF(allocationData.imgInfo == nullptr);
return allocateGraphicsMemoryForImage(allocationData);
}
if (allocationData.flags.shareable) {
return allocateShareableMemory(allocationData);
}
if (useNonSvmHostPtrAlloc(allocationData.type, allocationData.rootDeviceIndex)) {
auto allocation = allocateGraphicsMemoryForNonSvmHostPtr(allocationData);
if (allocation) {
allocation->setFlushL3Required(allocationData.flags.flushL3);
}
return allocation;
}
if (useInternal32BitAllocator(allocationData.type) ||
(force32bitAllocations && allocationData.flags.allow32Bit && is64bit)) {
return allocate32BitGraphicsMemoryImpl(allocationData);
}
if (allocationData.hostPtr) {
return allocateGraphicsMemoryWithHostPtr(allocationData);
}
if (peek64kbPagesEnabled(allocationData.rootDeviceIndex) && allocationData.flags.allow64kbPages) {
return allocateGraphicsMemory64kb(allocationData);
}
return allocateGraphicsMemoryWithAlignment(allocationData);
}
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImage(const AllocationData &allocationData) {
auto gmm = std::make_unique<Gmm>(executionEnvironment.getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo);
// AllocationData needs to be reconfigured for System Memory paths
AllocationData allocationDataWithSize = allocationData;
allocationDataWithSize.size = allocationData.imgInfo->size;
auto hostPtrAllocation = allocateGraphicsMemoryForImageFromHostPtr(allocationDataWithSize);
if (hostPtrAllocation) {
hostPtrAllocation->setDefaultGmm(gmm.release());
return hostPtrAllocation;
}
return allocateGraphicsMemoryForImageImpl(allocationDataWithSize, std::move(gmm));
}
EngineControlContainer &MemoryManager::getRegisteredEngines() {
return registeredEngines;
}
EngineControl *MemoryManager::getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver) {
EngineControl *engineCtrl = nullptr;
for (auto &engine : registeredEngines) {
if (engine.commandStreamReceiver == commandStreamReceiver) {
engineCtrl = &engine;
break;
}
}
return engineCtrl;
}
void MemoryManager::unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver) {
auto numRegisteredEngines = registeredEngines.size();
for (auto i = 0u; i < numRegisteredEngines; i++) {
if (registeredEngines[i].commandStreamReceiver == commandStreamReceiver) {
registeredEngines[i].osContext->decRefInternal();
std::swap(registeredEngines[i], registeredEngines[numRegisteredEngines - 1]);
registeredEngines.pop_back();
return;
}
}
}
void *MemoryManager::lockResource(GraphicsAllocation *graphicsAllocation) {
if (!graphicsAllocation) {
return nullptr;
}
if (graphicsAllocation->isLocked()) {
return graphicsAllocation->getLockedPtr();
}
auto retVal = lockResourceImpl(*graphicsAllocation);
graphicsAllocation->lock(retVal);
return retVal;
}
void MemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) {
if (!graphicsAllocation) {
return;
}
DEBUG_BREAK_IF(!graphicsAllocation->isLocked());
unlockResourceImpl(*graphicsAllocation);
graphicsAllocation->unlock();
}
HeapIndex MemoryManager::selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM) {
if (allocation) {
if (useInternal32BitAllocator(allocation->getAllocationType())) {
return HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY;
}
if (allocation->is32BitAllocation()) {
return HeapIndex::HEAP_EXTERNAL;
}
}
if (isFullRangeSVM) {
if (hasPointer) {
return HeapIndex::HEAP_SVM;
}
if (allocation && allocation->getDefaultGmm()->gmmResourceInfo->is64KBPageSuitable()) {
return HeapIndex::HEAP_STANDARD64KB;
}
return HeapIndex::HEAP_STANDARD;
}
// Limited range allocation goes to STANDARD heap
return HeapIndex::HEAP_STANDARD;
}
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
if (!graphicsAllocation->getUnderlyingBuffer()) {
return false;
}
memcpy_s(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
return true;
}
void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation) {
for (auto &engine : getRegisteredEngines()) {
auto osContextId = engine.osContext->getContextId();
auto allocationTaskCount = graphicsAllocation.getTaskCount(osContextId);
if (graphicsAllocation.isUsedByOsContext(osContextId) &&
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
engine.commandStreamReceiver->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, allocationTaskCount);
}
}
}
void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion) {
for (auto &engine : getRegisteredEngines()) {
auto csr = engine.commandStreamReceiver;
if (waitForCompletion) {
csr->waitForCompletionWithTimeout(false, 0, csr->peekLatestSentTaskCount());
}
csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION);
}
}
void *MemoryManager::getReservedMemory(size_t size, size_t alignment) {
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (!reservedMemory) {
reservedMemory = allocateSystemMemory(size, alignment);
}
return reservedMemory;
}
bool MemoryManager::isHostPointerTrackingEnabled(uint32_t rootDeviceIndex) {
if (DebugManager.flags.EnableHostPtrTracking.get() != -1) {
return !!DebugManager.flags.EnableHostPtrTracking.get();
}
return (peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.hostPtrTrackingEnabled | is32bit);
}
bool MemoryManager::isCopyRequired(ImageInfo &imgInfo, const void *hostPtr) {
if (!hostPtr) {
return false;
}
size_t imageWidth = imgInfo.imgDesc.imageWidth;
size_t imageHeight = 1;
size_t imageDepth = 1;
size_t imageCount = 1;
switch (imgInfo.imgDesc.imageType) {
case ImageType::Image3D:
imageDepth = imgInfo.imgDesc.imageDepth;
CPP_ATTRIBUTE_FALLTHROUGH;
case ImageType::Image2D:
case ImageType::Image2DArray:
imageHeight = imgInfo.imgDesc.imageHeight;
break;
default:
break;
}
auto hostPtrRowPitch = imgInfo.imgDesc.imageRowPitch ? imgInfo.imgDesc.imageRowPitch : imageWidth * imgInfo.surfaceFormat->ImageElementSizeInBytes;
auto hostPtrSlicePitch = imgInfo.imgDesc.imageSlicePitch ? imgInfo.imgDesc.imageSlicePitch : hostPtrRowPitch * imgInfo.imgDesc.imageHeight;
size_t pointerPassedSize = hostPtrRowPitch * imageHeight * imageDepth * imageCount;
auto alignedSizePassedPointer = alignSizeWholePage(const_cast<void *>(hostPtr), pointerPassedSize);
auto alignedSizeRequiredForAllocation = alignSizeWholePage(const_cast<void *>(hostPtr), imgInfo.size);
// Passed pointer doesn't have enough memory, copy is needed
bool copyRequired = (alignedSizeRequiredForAllocation > alignedSizePassedPointer) |
(imgInfo.rowPitch != hostPtrRowPitch) |
(imgInfo.slicePitch != hostPtrSlicePitch) |
((reinterpret_cast<uintptr_t>(hostPtr) & (MemoryConstants::cacheLineSize - 1)) != 0) |
!imgInfo.linearStorage;
return copyRequired;
}
void MemoryManager::overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties) {
int32_t directRingPlacement = DebugManager.flags.DirectSubmissionBufferPlacement.get();
if (properties.allocationType == GraphicsAllocation::AllocationType::RING_BUFFER &&
directRingPlacement != -1) {
if (directRingPlacement == 0) {
allocationData.flags.requiresCpuAccess = true;
allocationData.flags.useSystemMemory = false;
} else {
allocationData.flags.requiresCpuAccess = false;
allocationData.flags.useSystemMemory = true;
}
}
int32_t directSemaphorePlacement = DebugManager.flags.DirectSubmissionSemaphorePlacement.get();
if (properties.allocationType == GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER &&
directSemaphorePlacement != -1) {
if (directSemaphorePlacement == 0) {
allocationData.flags.requiresCpuAccess = true;
allocationData.flags.useSystemMemory = false;
} else {
allocationData.flags.requiresCpuAccess = false;
allocationData.flags.useSystemMemory = true;
}
}
}
} // namespace NEO

View File

@@ -0,0 +1,258 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/preemption_mode.h"
#include "helpers/aligned_memory.h"
#include "helpers/bit_helpers.h"
#include "helpers/common_types.h"
#include "helpers/engine_control.h"
#include "memory_manager/allocation_properties.h"
#include "memory_manager/gfx_partition.h"
#include "memory_manager/graphics_allocation.h"
#include "memory_manager/host_ptr_defines.h"
#include "memory_manager/local_memory_usage.h"
#include "page_fault_manager/cpu_page_fault_manager.h"
#include "engine_node.h"
#include <bitset>
#include <cstdint>
#include <mutex>
#include <vector>
namespace NEO {
class DeferredDeleter;
class ExecutionEnvironment;
class Gmm;
class HostPtrManager;
class OsContext;
enum AllocationUsage {
TEMPORARY_ALLOCATION,
REUSABLE_ALLOCATION
};
struct AlignedMallocRestrictions {
uintptr_t minAddress;
};
constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte;
class MemoryManager {
public:
enum AllocationStatus {
Success = 0,
Error,
InvalidHostPointer,
RetryInNonDevicePool
};
MemoryManager(ExecutionEnvironment &executionEnvironment);
virtual ~MemoryManager();
MOCKABLE_VIRTUAL void *allocateSystemMemory(size_t size, size_t alignment);
virtual void addAllocationToHostPtrManager(GraphicsAllocation *memory) = 0;
virtual void removeAllocationFromHostPtrManager(GraphicsAllocation *memory) = 0;
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) {
return allocateGraphicsMemoryInPreferredPool(properties, nullptr);
}
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) {
return allocateGraphicsMemoryInPreferredPool(properties, ptr);
}
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr);
virtual GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) = 0;
virtual GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) = 0;
virtual bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation);
void *lockResource(GraphicsAllocation *graphicsAllocation);
void unlockResource(GraphicsAllocation *graphicsAllocation);
void cleanGraphicsMemoryCreatedFromHostPtr(GraphicsAllocation *);
GraphicsAllocation *createGraphicsAllocationWithPadding(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding);
virtual GraphicsAllocation *createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding);
virtual AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) = 0;
virtual void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) = 0;
void freeSystemMemory(void *ptr);
virtual void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) = 0;
MOCKABLE_VIRTUAL void freeGraphicsMemory(GraphicsAllocation *gfxAllocation);
virtual void handleFenceCompletion(GraphicsAllocation *allocation){};
void checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation);
virtual uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) = 0;
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) = 0;
uint64_t getMaxApplicationAddress() { return is64bit ? MemoryConstants::max64BitAppAddress : MemoryConstants::max32BitAppAddress; };
uint64_t getInternalHeapBaseAddress(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); }
uint64_t getExternalHeapBaseAddress(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTERNAL); }
bool isLimitedRange(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->isLimitedRange(); }
bool peek64kbPagesEnabled(uint32_t rootDeviceIndex) const;
bool peekForce32BitAllocations() const { return force32bitAllocations; }
void setForce32BitAllocations(bool newValue) { force32bitAllocations = newValue; }
bool peekVirtualPaddingSupport() const { return virtualPaddingAvailable; }
void setVirtualPaddingSupport(bool virtualPaddingSupport) { virtualPaddingAvailable = virtualPaddingSupport; }
DeferredDeleter *getDeferredDeleter() const {
return deferredDeleter.get();
}
PageFaultManager *getPageFaultManager() const {
return pageFaultManager.get();
}
void waitForDeletions();
void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation);
void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion);
bool isAsyncDeleterEnabled() const;
bool isLocalMemorySupported(uint32_t rootDeviceIndex) const;
virtual bool isMemoryBudgetExhausted() const;
virtual AlignedMallocRestrictions *getAlignedMallocRestrictions() {
return nullptr;
}
MOCKABLE_VIRTUAL void *alignedMallocWrapper(size_t bytes, size_t alignment) {
return ::alignedMalloc(bytes, alignment);
}
MOCKABLE_VIRTUAL void alignedFreeWrapper(void *ptr) {
::alignedFree(ptr);
}
MOCKABLE_VIRTUAL bool isHostPointerTrackingEnabled(uint32_t rootDeviceIndex);
void setForceNonSvmForExternalHostPtr(bool mode) {
forceNonSvmForExternalHostPtr = mode;
}
const ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; }
OsContext *createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority);
uint32_t getRegisteredEnginesCount() const { return static_cast<uint32_t>(registeredEngines.size()); }
EngineControlContainer &getRegisteredEngines();
EngineControl *getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
void unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
void setDefaultEngineIndex(uint32_t index) { defaultEngineIndex = index; }
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy);
static HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM);
static std::unique_ptr<MemoryManager> createMemoryManager(ExecutionEnvironment &executionEnvironment);
virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; };
virtual void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex){};
void *getReservedMemory(size_t size, size_t alignment);
GfxPartition *getGfxPartition(uint32_t rootDeviceIndex) { return gfxPartitions.at(rootDeviceIndex).get(); }
static uint32_t maxOsContextCount;
virtual void commonCleanup(){};
protected:
struct AllocationData {
union {
struct {
uint32_t allocateMemory : 1;
uint32_t allow64kbPages : 1;
uint32_t allow32Bit : 1;
uint32_t useSystemMemory : 1;
uint32_t forcePin : 1;
uint32_t uncacheable : 1;
uint32_t flushL3 : 1;
uint32_t preferRenderCompressed : 1;
uint32_t multiOsContextCapable : 1;
uint32_t requiresCpuAccess : 1;
uint32_t shareable : 1;
uint32_t resource48Bit : 1;
uint32_t reserved : 20;
} flags;
uint32_t allFlags = 0;
};
static_assert(sizeof(AllocationData::flags) == sizeof(AllocationData::allFlags), "");
GraphicsAllocation::AllocationType type = GraphicsAllocation::AllocationType::UNKNOWN;
const void *hostPtr = nullptr;
size_t size = 0;
size_t alignment = 0;
StorageInfo storageInfo = {};
ImageInfo *imgInfo = nullptr;
uint32_t rootDeviceIndex = 0;
};
static bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
static bool useInternal32BitAllocator(GraphicsAllocation::AllocationType allocationType) {
return allocationType == GraphicsAllocation::AllocationType::KERNEL_ISA ||
allocationType == GraphicsAllocation::AllocationType::INTERNAL_HEAP;
}
static bool isCopyRequired(ImageInfo &imgInfo, const void *hostPtr);
bool useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType allocationType, uint32_t rootDeviceIndex) {
bool isExternalHostPtrAlloc = (allocationType == GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR);
bool isMapAlloc = (allocationType == GraphicsAllocation::AllocationType::MAP_ALLOCATION);
if (forceNonSvmForExternalHostPtr && isExternalHostPtrAlloc) {
return true;
}
bool isNonSvmPtrCapable = ((!peekExecutionEnvironment().isFullRangeSvm() || !isHostPointerTrackingEnabled(rootDeviceIndex)) & !is32bit);
return isNonSvmPtrCapable && (isExternalHostPtrAlloc || isMapAlloc);
}
StorageInfo createStorageInfoFromProperties(const AllocationProperties &properties);
virtual GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) = 0;
virtual GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) = 0;
GraphicsAllocation *allocateGraphicsMemory(const AllocationData &allocationData);
virtual GraphicsAllocation *allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData);
virtual GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) = 0;
virtual GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) = 0;
virtual GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) = 0;
virtual GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) = 0;
GraphicsAllocation *allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData);
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData);
virtual GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) = 0;
virtual GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) = 0;
virtual void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) = 0;
virtual void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) = 0;
virtual void freeAssociatedResourceImpl(GraphicsAllocation &graphicsAllocation) { return unlockResourceImpl(graphicsAllocation); };
uint32_t getBanksCount();
bool forceNonSvmForExternalHostPtr = false;
bool force32bitAllocations = false;
bool virtualPaddingAvailable = false;
std::unique_ptr<DeferredDeleter> deferredDeleter;
bool asyncDeleterEnabled = false;
std::vector<bool> enable64kbpages;
std::vector<bool> localMemorySupported;
bool supportsMultiStorageResources = true;
ExecutionEnvironment &executionEnvironment;
EngineControlContainer registeredEngines;
std::unique_ptr<HostPtrManager> hostPtrManager;
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
uint32_t defaultEngineIndex = 0;
std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;
std::vector<std::unique_ptr<GfxPartition>> gfxPartitions;
std::unique_ptr<LocalMemoryUsageBankSelector> localMemoryUsageBankSelector;
void *reservedMemory = nullptr;
std::unique_ptr<PageFaultManager> pageFaultManager;
};
std::unique_ptr<DeferredDeleter> createDeferredDeleter();
} // namespace NEO

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/memory_operations_status.h"
#include "utilities/arrayref.h"
namespace NEO {
class GraphicsAllocation;
class MemoryOperationsHandler {
public:
MemoryOperationsHandler() = default;
virtual ~MemoryOperationsHandler() = default;
virtual MemoryOperationsStatus makeResident(ArrayRef<GraphicsAllocation *> gfxAllocations) = 0;
virtual MemoryOperationsStatus evict(GraphicsAllocation &gfxAllocation) = 0;
virtual MemoryOperationsStatus isResident(GraphicsAllocation &gfxAllocation) = 0;
};
} // namespace NEO

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
enum class MemoryOperationsStatus : uint32_t {
SUCCESS = 0,
FAILED,
MEMORY_NOT_FOUND,
OUT_OF_MEMORY,
UNSUPPORTED,
DEVICE_UNINITIALIZED,
};
} // namespace NEO

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/extendable_enum.h"
namespace MemoryPool {
struct Type : ExtendableEnum {
constexpr Type(uint32_t val) : ExtendableEnum(val) {}
};
constexpr Type MemoryNull{0};
constexpr Type System4KBPages{1};
constexpr Type System64KBPages{2};
constexpr Type System4KBPagesWith32BitGpuAddressing{3};
constexpr Type System64KBPagesWith32BitGpuAddressing{4};
constexpr Type SystemCpuInaccessible{5};
constexpr Type LocalMemory{6};
inline bool isSystemMemoryPool(Type pool) {
return pool == System4KBPages ||
pool == System64KBPages ||
pool == System4KBPagesWith32BitGpuAddressing ||
pool == System64KBPagesWith32BitGpuAddressing;
}
} // namespace MemoryPool

View File

@@ -0,0 +1,18 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/residency.h"
using namespace NEO;
void ResidencyData::updateCompletionData(uint64_t newFenceValue, uint32_t contextId) {
lastFenceValues[contextId] = newFenceValue;
}
uint64_t ResidencyData::getFenceValueForContextId(uint32_t contextId) {
return lastFenceValues[contextId];
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "memory_manager/memory_manager.h"
#include "utilities/stackvec.h"
#include <vector>
namespace NEO {
struct ResidencyData {
ResidencyData() : lastFenceValues(static_cast<size_t>(MemoryManager::maxOsContextCount)) {}
std::vector<bool> resident = std::vector<bool>(MemoryManager::maxOsContextCount, 0);
void updateCompletionData(uint64_t newFenceValue, uint32_t contextId);
uint64_t getFenceValueForContextId(uint32_t contextId);
protected:
StackVec<uint64_t, 32> lastFenceValues;
};
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <utility>
#include <vector>
namespace NEO {
class GraphicsAllocation;
using ResidencyContainer = std::vector<GraphicsAllocation *>;
using AllocationView = std::pair<uint64_t /*address*/, size_t /*size*/>;
} // namespace NEO

View File

@@ -0,0 +1,110 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "command_stream/command_stream_receiver.h"
#include "helpers/cache_policy.h"
#include "memory_manager/graphics_allocation.h"
namespace NEO {
class Surface {
public:
Surface(bool isCoherent = false) : IsCoherent(isCoherent) {}
virtual ~Surface() = default;
virtual void makeResident(CommandStreamReceiver &csr) = 0;
virtual Surface *duplicate() = 0;
virtual bool allowsL3Caching() { return true; }
bool IsCoherent;
};
class NullSurface : public Surface {
public:
NullSurface(){};
~NullSurface() override = default;
void makeResident(CommandStreamReceiver &csr) override{};
Surface *duplicate() override { return new NullSurface(); };
};
class HostPtrSurface : public Surface {
public:
HostPtrSurface(void *ptr, size_t size) : memoryPointer(ptr), surfaceSize(size) {
UNRECOVERABLE_IF(!ptr);
gfxAllocation = nullptr;
}
HostPtrSurface(void *ptr, size_t size, bool copyAllowed) : HostPtrSurface(ptr, size) {
isPtrCopyAllowed = copyAllowed;
}
HostPtrSurface(void *ptr, size_t size, GraphicsAllocation *allocation) : memoryPointer(ptr), surfaceSize(size), gfxAllocation(allocation) {
DEBUG_BREAK_IF(!ptr);
}
~HostPtrSurface() override = default;
void makeResident(CommandStreamReceiver &csr) override {
DEBUG_BREAK_IF(!gfxAllocation);
csr.makeResidentHostPtrAllocation(gfxAllocation);
}
Surface *duplicate() override {
return new HostPtrSurface(this->memoryPointer, this->surfaceSize, this->gfxAllocation);
};
void *getMemoryPointer() const {
return memoryPointer;
}
size_t getSurfaceSize() const {
return surfaceSize;
}
void setAllocation(GraphicsAllocation *allocation) {
this->gfxAllocation = allocation;
}
GraphicsAllocation *getAllocation() {
return gfxAllocation;
}
bool peekIsPtrCopyAllowed() {
return isPtrCopyAllowed;
}
bool allowsL3Caching() override {
return isL3Capable(*gfxAllocation);
}
protected:
void *memoryPointer;
size_t surfaceSize;
GraphicsAllocation *gfxAllocation;
bool isPtrCopyAllowed = false;
};
class GeneralSurface : public Surface {
public:
GeneralSurface() : Surface(false) {
gfxAllocation = nullptr;
}
GeneralSurface(GraphicsAllocation *gfxAlloc) : Surface(gfxAlloc->isCoherent()) {
gfxAllocation = gfxAlloc;
};
~GeneralSurface() override = default;
void makeResident(CommandStreamReceiver &csr) override {
csr.makeResident(*gfxAllocation);
};
Surface *duplicate() override { return new GeneralSurface(gfxAllocation); };
void setGraphicsAllocation(GraphicsAllocation *newAllocation) {
gfxAllocation = newAllocation;
IsCoherent = newAllocation->isCoherent();
}
protected:
GraphicsAllocation *gfxAllocation;
};
} // namespace NEO

View File

@@ -0,0 +1,285 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "memory_manager/unified_memory_manager.h"
#include "command_stream/command_stream_receiver.h"
#include "helpers/aligned_memory.h"
#include "memory_manager/memory_manager.h"
#include "opencl/source/mem_obj/mem_obj_helper.h"
namespace NEO {
void SVMAllocsManager::MapBasedAllocationTracker::insert(SvmAllocationData allocationsPair) {
allocations.insert(std::make_pair(reinterpret_cast<void *>(allocationsPair.gpuAllocation->getGpuAddress()), allocationsPair));
}
void SVMAllocsManager::MapBasedAllocationTracker::remove(SvmAllocationData allocationsPair) {
SvmAllocationContainer::iterator iter;
iter = allocations.find(reinterpret_cast<void *>(allocationsPair.gpuAllocation->getGpuAddress()));
allocations.erase(iter);
}
SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) {
SvmAllocationContainer::iterator Iter, End;
SvmAllocationData *svmAllocData;
if (ptr == nullptr)
return nullptr;
End = allocations.end();
Iter = allocations.lower_bound(ptr);
if (((Iter != End) && (Iter->first != ptr)) ||
(Iter == End)) {
if (Iter == allocations.begin()) {
Iter = End;
} else {
Iter--;
}
}
if (Iter != End) {
svmAllocData = &Iter->second;
char *charPtr = reinterpret_cast<char *>(svmAllocData->gpuAllocation->getGpuAddress());
if (ptr < (charPtr + svmAllocData->size)) {
return svmAllocData;
}
}
return nullptr;
}
void SVMAllocsManager::MapOperationsTracker::insert(SvmMapOperation mapOperation) {
operations.insert(std::make_pair(mapOperation.regionSvmPtr, mapOperation));
}
void SVMAllocsManager::MapOperationsTracker::remove(const void *regionPtr) {
SvmMapOperationsContainer::iterator iter;
iter = operations.find(regionPtr);
operations.erase(iter);
}
SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionPtr) {
SvmMapOperationsContainer::iterator iter;
iter = operations.find(regionPtr);
if (iter == operations.end()) {
return nullptr;
}
return &iter->second;
}
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
std::unique_lock<SpinLock> lock(mtx);
for (auto &allocation : this->SVMAllocs.allocations) {
if (allocation.second.memoryType & requestedTypesMask) {
commandStreamReceiver.makeResident(*allocation.second.gpuAllocation);
}
}
}
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager) : memoryManager(memoryManager) {
}
void *SVMAllocsManager::createSVMAlloc(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties svmProperties) {
if (size == 0)
return nullptr;
std::unique_lock<SpinLock> lock(mtx);
if (!memoryManager->isLocalMemorySupported(rootDeviceIndex)) {
return createZeroCopySvmAllocation(rootDeviceIndex, size, svmProperties);
} else {
return createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, svmProperties, {});
}
}
void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &memoryProperties) {
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
if (memoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) {
if (memoryProperties.allocationFlags.allocFlags.allocWriteCombined) {
allocationType = GraphicsAllocation::AllocationType::WRITE_COMBINED;
} else {
allocationType = GraphicsAllocation::AllocationType::BUFFER;
}
}
AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
true,
alignedSize,
allocationType,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield};
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties);
if (!unifiedMemoryAllocation) {
return nullptr;
}
SvmAllocationData allocData;
allocData.gpuAllocation = unifiedMemoryAllocation;
allocData.cpuAllocation = nullptr;
allocData.size = size;
allocData.memoryType = memoryProperties.memoryType;
allocData.allocationFlagsProperty = memoryProperties.allocationFlags;
allocData.device = memoryProperties.device;
std::unique_lock<SpinLock> lock(mtx);
this->SVMAllocs.insert(allocData);
return reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
}
void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &memoryProperties, void *cmdQ) {
auto supportDualStorageSharedMemory = memoryManager->isLocalMemorySupported(rootDeviceIndex);
if (DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
supportDualStorageSharedMemory = !!DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
}
if (supportDualStorageSharedMemory) {
auto unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
}
auto unifiedMemoryAllocation = this->getSVMAlloc(unifiedMemoryPointer);
unifiedMemoryAllocation->memoryType = memoryProperties.memoryType;
unifiedMemoryAllocation->allocationFlagsProperty = memoryProperties.allocationFlags;
UNRECOVERABLE_IF(cmdQ == nullptr);
auto pageFaultManager = this->memoryManager->getPageFaultManager();
pageFaultManager->insertAllocation(unifiedMemoryPointer, size, this, cmdQ);
return unifiedMemoryPointer;
}
return createUnifiedMemoryAllocation(rootDeviceIndex, size, memoryProperties);
}
SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) {
std::unique_lock<SpinLock> lock(mtx);
return SVMAllocs.get(ptr);
}
bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
SvmAllocationData *svmData = getSVMAlloc(ptr);
if (svmData) {
if (blocking) {
if (svmData->cpuAllocation) {
this->memoryManager->waitForEnginesCompletion(*svmData->cpuAllocation);
}
this->memoryManager->waitForEnginesCompletion(*svmData->gpuAllocation);
}
auto pageFaultManager = this->memoryManager->getPageFaultManager();
if (pageFaultManager) {
pageFaultManager->removeAllocation(ptr);
}
std::unique_lock<SpinLock> lock(mtx);
if (svmData->gpuAllocation->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY) {
freeZeroCopySvmAllocation(svmData);
} else {
freeSvmAllocationWithDeviceStorage(svmData);
}
return true;
}
return false;
}
void *SVMAllocsManager::createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties) {
AllocationProperties properties{rootDeviceIndex, true, size, GraphicsAllocation::AllocationType::SVM_ZERO_COPY, false};
MemoryPropertiesParser::fillCachePolicyInProperties(properties, false, svmProperties.readOnly, false);
GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
if (!allocation) {
return nullptr;
}
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocation->setCoherent(svmProperties.coherent);
SvmAllocationData allocData;
allocData.gpuAllocation = allocation;
allocData.size = size;
this->SVMAllocs.insert(allocData);
return allocation->getUnderlyingBuffer();
}
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
AllocationProperties cpuProperties{rootDeviceIndex, true, alignedSize, GraphicsAllocation::AllocationType::SVM_CPU, false};
cpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesParser::fillCachePolicyInProperties(cpuProperties, false, svmProperties.readOnly, false);
GraphicsAllocation *allocationCpu = memoryManager->allocateGraphicsMemoryWithProperties(cpuProperties);
if (!allocationCpu) {
return nullptr;
}
allocationCpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocationCpu->setCoherent(svmProperties.coherent);
void *svmPtr = allocationCpu->getUnderlyingBuffer();
AllocationProperties gpuProperties{rootDeviceIndex,
false,
alignedSize,
GraphicsAllocation::AllocationType::SVM_GPU,
unifiedMemoryProperties.subdeviceBitfield.count() > 1,
false,
unifiedMemoryProperties.subdeviceBitfield};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesParser::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false);
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties, svmPtr);
if (!allocationGpu) {
memoryManager->freeGraphicsMemory(allocationCpu);
return nullptr;
}
allocationGpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocationGpu->setCoherent(svmProperties.coherent);
SvmAllocationData allocData;
allocData.gpuAllocation = allocationGpu;
allocData.cpuAllocation = allocationCpu;
allocData.device = unifiedMemoryProperties.device;
allocData.size = size;
this->SVMAllocs.insert(allocData);
return svmPtr;
}
void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) {
GraphicsAllocation *gpuAllocation = svmData->gpuAllocation;
SVMAllocs.remove(*svmData);
memoryManager->freeGraphicsMemory(gpuAllocation);
}
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {
GraphicsAllocation *gpuAllocation = svmData->gpuAllocation;
GraphicsAllocation *cpuAllocation = svmData->cpuAllocation;
SVMAllocs.remove(*svmData);
memoryManager->freeGraphicsMemory(gpuAllocation);
memoryManager->freeGraphicsMemory(cpuAllocation);
}
SvmMapOperation *SVMAllocsManager::getSvmMapOperation(const void *ptr) {
std::unique_lock<SpinLock> lock(mtx);
return svmMapOperations.get(ptr);
}
void SVMAllocsManager::insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap) {
SvmMapOperation svmMapOperation;
svmMapOperation.regionSvmPtr = regionSvmPtr;
svmMapOperation.baseSvmPtr = baseSvmPtr;
svmMapOperation.offset = offset;
svmMapOperation.regionSize = regionSize;
svmMapOperation.readOnlyMap = readOnlyMap;
std::unique_lock<SpinLock> lock(mtx);
svmMapOperations.insert(svmMapOperation);
}
void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) {
std::unique_lock<SpinLock> lock(mtx);
svmMapOperations.remove(regionSvmPtr);
}
} // namespace NEO

View File

@@ -0,0 +1,110 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/common_types.h"
#include "unified_memory/unified_memory.h"
#include "utilities/spinlock.h"
#include "memory_properties_flags.h"
#include <cstdint>
#include <map>
#include <mutex>
namespace NEO {
class CommandStreamReceiver;
class GraphicsAllocation;
class MemoryManager;
struct SvmAllocationData {
GraphicsAllocation *cpuAllocation = nullptr;
GraphicsAllocation *gpuAllocation = nullptr;
size_t size = 0;
InternalMemoryType memoryType = InternalMemoryType::SVM;
MemoryPropertiesFlags allocationFlagsProperty;
void *device = nullptr;
};
struct SvmMapOperation {
void *regionSvmPtr = nullptr;
size_t regionSize = 0;
void *baseSvmPtr = nullptr;
size_t offset = 0;
bool readOnlyMap = false;
};
class SVMAllocsManager {
public:
class MapBasedAllocationTracker {
friend class SVMAllocsManager;
public:
using SvmAllocationContainer = std::map<const void *, SvmAllocationData>;
void insert(SvmAllocationData);
void remove(SvmAllocationData);
SvmAllocationData *get(const void *);
size_t getNumAllocs() const { return allocations.size(); };
protected:
SvmAllocationContainer allocations;
};
struct MapOperationsTracker {
using SvmMapOperationsContainer = std::map<const void *, SvmMapOperation>;
void insert(SvmMapOperation);
void remove(const void *);
SvmMapOperation *get(const void *);
size_t getNumMapOperations() const { return operations.size(); };
protected:
SvmMapOperationsContainer operations;
};
struct SvmAllocationProperties {
bool coherent = false;
bool hostPtrReadOnly = false;
bool readOnly = false;
};
struct UnifiedMemoryProperties {
UnifiedMemoryProperties() = default;
UnifiedMemoryProperties(InternalMemoryType memoryType) : memoryType(memoryType){};
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
MemoryPropertiesFlags allocationFlags;
void *device = nullptr;
DeviceBitfield subdeviceBitfield;
};
SVMAllocsManager(MemoryManager *memoryManager);
void *createSVMAlloc(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties svmProperties);
void *createUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &svmProperties);
void *createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &svmProperties, void *cmdQ);
SvmAllocationData *getSVMAlloc(const void *ptr);
bool freeSVMAlloc(void *ptr, bool blocking);
bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); }
size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); }
MapBasedAllocationTracker *getSVMAllocs() { return &SVMAllocs; }
void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
void removeSvmMapOperation(const void *regionSvmPtr);
SvmMapOperation *getSvmMapOperation(const void *regionPtr);
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
void *createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
protected:
void *createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties);
void freeZeroCopySvmAllocation(SvmAllocationData *svmData);
MapBasedAllocationTracker SVMAllocs;
MapOperationsTracker svmMapOperations;
MemoryManager *memoryManager;
SpinLock mtx;
};
} // namespace NEO