mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 05:24:02 +08:00
Reorganization directory structure [3/n]
Change-Id: If3dfa3f6007f8810a6a1ae1a4f0c7da38544648d
This commit is contained in:
47
shared/source/memory_manager/CMakeLists.txt
Normal file
47
shared/source/memory_manager/CMakeLists.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
#
|
||||
# Copyright (C) 2019-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(NEO_CORE_MEMORY_MANAGER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/allocations_list.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/allocation_properties.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/engine_limits.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/eviction_status.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/gfx_partition_init_additional_range.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/graphics_allocation_extra.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_defines.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_memory_usage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_memory_usage.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_constants.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_handler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_status.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_pool.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/residency.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/residency.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/residency_container.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/surface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_MEMORY_MANAGER ${NEO_CORE_MEMORY_MANAGER})
|
||||
76
shared/source/memory_manager/allocation_properties.h
Normal file
76
shared/source/memory_manager/allocation_properties.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "device/sub_device.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
struct ImageInfo;
|
||||
struct AllocationProperties {
|
||||
union {
|
||||
struct {
|
||||
uint32_t allocateMemory : 1;
|
||||
uint32_t flushL3RequiredForRead : 1;
|
||||
uint32_t flushL3RequiredForWrite : 1;
|
||||
uint32_t forcePin : 1;
|
||||
uint32_t uncacheable : 1;
|
||||
uint32_t multiOsContextCapable : 1;
|
||||
uint32_t readOnlyMultiStorage : 1;
|
||||
uint32_t shareable : 1;
|
||||
uint32_t resource48Bit : 1;
|
||||
uint32_t reserved : 23;
|
||||
} flags;
|
||||
uint32_t allFlags = 0;
|
||||
};
|
||||
static_assert(sizeof(AllocationProperties::flags) == sizeof(AllocationProperties::allFlags), "");
|
||||
const uint32_t rootDeviceIndex;
|
||||
size_t size = 0;
|
||||
size_t alignment = 0;
|
||||
GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::UNKNOWN;
|
||||
ImageInfo *imgInfo = nullptr;
|
||||
bool multiStorageResource = false;
|
||||
DeviceBitfield subDevicesBitfield{};
|
||||
|
||||
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
|
||||
GraphicsAllocation::AllocationType allocationType)
|
||||
: AllocationProperties(rootDeviceIndex, true, size, allocationType, false) {}
|
||||
|
||||
AllocationProperties(uint32_t rootDeviceIndex, bool allocateMemory,
|
||||
ImageInfo &imgInfo,
|
||||
GraphicsAllocation::AllocationType allocationType)
|
||||
: AllocationProperties(rootDeviceIndex, allocateMemory, 0u, allocationType, false) {
|
||||
this->imgInfo = &imgInfo;
|
||||
}
|
||||
|
||||
AllocationProperties(uint32_t rootDeviceIndex,
|
||||
bool allocateMemory,
|
||||
size_t size,
|
||||
GraphicsAllocation::AllocationType allocationType,
|
||||
bool isMultiStorageAllocation)
|
||||
: AllocationProperties(rootDeviceIndex, allocateMemory, size, allocationType, false, isMultiStorageAllocation, {}) {}
|
||||
|
||||
AllocationProperties(uint32_t rootDeviceIndexParam,
|
||||
bool allocateMemoryParam,
|
||||
size_t sizeParam,
|
||||
GraphicsAllocation::AllocationType allocationTypeParam,
|
||||
bool multiOsContextCapableParam,
|
||||
bool isMultiStorageAllocationParam,
|
||||
DeviceBitfield subDevicesBitfieldParam)
|
||||
: rootDeviceIndex(rootDeviceIndexParam),
|
||||
size(sizeParam),
|
||||
allocationType(allocationTypeParam),
|
||||
multiStorageResource(isMultiStorageAllocationParam),
|
||||
subDevicesBitfield(subDevicesBitfieldParam) {
|
||||
allFlags = 0;
|
||||
flags.flushL3RequiredForRead = 1;
|
||||
flags.flushL3RequiredForWrite = 1;
|
||||
flags.allocateMemory = allocateMemoryParam;
|
||||
flags.multiOsContextCapable = multiOsContextCapableParam;
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
23
shared/source/memory_manager/allocations_list.h
Normal file
23
shared/source/memory_manager/allocations_list.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
|
||||
class AllocationsList : public IDList<GraphicsAllocation, true, true> {
|
||||
public:
|
||||
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation::AllocationType allocationType);
|
||||
|
||||
private:
|
||||
GraphicsAllocation *detachAllocationImpl(GraphicsAllocation *, void *);
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/deferrable_allocation_deletion.h"
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "helpers/engine_control.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
#include "os_interface/os_context.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
DeferrableAllocationDeletion::DeferrableAllocationDeletion(MemoryManager &memoryManager, GraphicsAllocation &graphicsAllocation) : memoryManager(memoryManager),
|
||||
graphicsAllocation(graphicsAllocation) {}
|
||||
bool DeferrableAllocationDeletion::apply() {
|
||||
if (graphicsAllocation.isUsed()) {
|
||||
bool isStillUsed = false;
|
||||
for (auto &engine : memoryManager.getRegisteredEngines()) {
|
||||
auto contextId = engine.osContext->getContextId();
|
||||
if (graphicsAllocation.isUsedByOsContext(contextId)) {
|
||||
auto currentContextTaskCount = *engine.commandStreamReceiver->getTagAddress();
|
||||
if (graphicsAllocation.getTaskCount(contextId) <= currentContextTaskCount) {
|
||||
graphicsAllocation.releaseUsageInOsContext(contextId);
|
||||
} else {
|
||||
isStillUsed = true;
|
||||
engine.commandStreamReceiver->flushBatchedSubmissions();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isStillUsed) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
memoryManager.freeGraphicsMemory(&graphicsAllocation);
|
||||
return true;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/deferrable_deletion.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
|
||||
class DeferrableAllocationDeletion : public DeferrableDeletion {
|
||||
public:
|
||||
DeferrableAllocationDeletion(MemoryManager &memoryManager, GraphicsAllocation &graphicsAllocation);
|
||||
bool apply() override;
|
||||
|
||||
protected:
|
||||
MemoryManager &memoryManager;
|
||||
GraphicsAllocation &graphicsAllocation;
|
||||
};
|
||||
} // namespace NEO
|
||||
18
shared/source/memory_manager/deferrable_deletion.h
Normal file
18
shared/source/memory_manager/deferrable_deletion.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "utilities/idlist.h"
|
||||
|
||||
namespace NEO {
|
||||
class DeferrableDeletion : public IDNode<DeferrableDeletion> {
|
||||
public:
|
||||
template <typename... Args>
|
||||
static DeferrableDeletion *create(Args... args);
|
||||
virtual bool apply() = 0;
|
||||
};
|
||||
} // namespace NEO
|
||||
127
shared/source/memory_manager/deferred_deleter.cpp
Normal file
127
shared/source/memory_manager/deferred_deleter.cpp
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/deferred_deleter.h"
|
||||
|
||||
#include "memory_manager/deferrable_deletion.h"
|
||||
#include "os_interface/os_thread.h"
|
||||
|
||||
namespace NEO {
|
||||
DeferredDeleter::DeferredDeleter() {
|
||||
doWorkInBackground = false;
|
||||
elementsToRelease = 0;
|
||||
}
|
||||
|
||||
void DeferredDeleter::stop() {
|
||||
// Called with threadMutex acquired
|
||||
if (worker != nullptr) {
|
||||
// Working thread was created so we can safely stop it
|
||||
std::unique_lock<std::mutex> lock(queueMutex);
|
||||
// Make sure that working thread really started
|
||||
while (!doWorkInBackground) {
|
||||
lock.unlock();
|
||||
lock.lock();
|
||||
}
|
||||
// Signal working thread to finish its job
|
||||
doWorkInBackground = false;
|
||||
lock.unlock();
|
||||
condition.notify_one();
|
||||
// Wait for the working job to exit
|
||||
worker->join();
|
||||
// Delete working thread
|
||||
worker.reset();
|
||||
}
|
||||
drain(false);
|
||||
}
|
||||
|
||||
void DeferredDeleter::safeStop() {
|
||||
std::lock_guard<std::mutex> lock(threadMutex);
|
||||
stop();
|
||||
}
|
||||
|
||||
DeferredDeleter::~DeferredDeleter() {
|
||||
safeStop();
|
||||
}
|
||||
|
||||
void DeferredDeleter::deferDeletion(DeferrableDeletion *deletion) {
|
||||
std::unique_lock<std::mutex> lock(queueMutex);
|
||||
elementsToRelease++;
|
||||
queue.pushTailOne(*deletion);
|
||||
lock.unlock();
|
||||
condition.notify_one();
|
||||
}
|
||||
|
||||
void DeferredDeleter::addClient() {
|
||||
std::lock_guard<std::mutex> lock(threadMutex);
|
||||
++numClients;
|
||||
ensureThread();
|
||||
}
|
||||
|
||||
void DeferredDeleter::removeClient() {
|
||||
std::lock_guard<std::mutex> lock(threadMutex);
|
||||
--numClients;
|
||||
if (numClients == 0) {
|
||||
stop();
|
||||
}
|
||||
}
|
||||
|
||||
void DeferredDeleter::ensureThread() {
|
||||
if (worker != nullptr) {
|
||||
return;
|
||||
}
|
||||
worker = Thread::create(run, reinterpret_cast<void *>(this));
|
||||
}
|
||||
|
||||
bool DeferredDeleter::areElementsReleased() {
|
||||
return elementsToRelease == 0;
|
||||
}
|
||||
|
||||
bool DeferredDeleter::shouldStop() {
|
||||
return !doWorkInBackground;
|
||||
}
|
||||
|
||||
void *DeferredDeleter::run(void *arg) {
|
||||
auto self = reinterpret_cast<DeferredDeleter *>(arg);
|
||||
std::unique_lock<std::mutex> lock(self->queueMutex);
|
||||
// Mark that working thread really started
|
||||
self->doWorkInBackground = true;
|
||||
do {
|
||||
if (self->queue.peekIsEmpty()) {
|
||||
// Wait for signal that some items are ready to be deleted
|
||||
self->condition.wait(lock);
|
||||
}
|
||||
lock.unlock();
|
||||
// Delete items placed into deferred delete queue
|
||||
self->clearQueue();
|
||||
lock.lock();
|
||||
// Check whether working thread should be stopped
|
||||
} while (!self->shouldStop());
|
||||
lock.unlock();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void DeferredDeleter::drain(bool blocking) {
|
||||
clearQueue();
|
||||
if (blocking) {
|
||||
while (!areElementsReleased())
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
void DeferredDeleter::clearQueue() {
|
||||
do {
|
||||
auto deletion = queue.removeFrontOne();
|
||||
if (deletion) {
|
||||
if (deletion->apply()) {
|
||||
elementsToRelease--;
|
||||
} else {
|
||||
queue.pushTailOne(*deletion.release());
|
||||
}
|
||||
}
|
||||
} while (!queue.peekIsEmpty());
|
||||
}
|
||||
} // namespace NEO
|
||||
54
shared/source/memory_manager/deferred_deleter.h
Normal file
54
shared/source/memory_manager/deferred_deleter.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "utilities/idlist.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
namespace NEO {
|
||||
class DeferrableDeletion;
|
||||
class Thread;
|
||||
class DeferredDeleter {
|
||||
public:
|
||||
DeferredDeleter();
|
||||
virtual ~DeferredDeleter();
|
||||
|
||||
DeferredDeleter(const DeferredDeleter &) = delete;
|
||||
DeferredDeleter &operator=(const DeferredDeleter &) = delete;
|
||||
|
||||
MOCKABLE_VIRTUAL void deferDeletion(DeferrableDeletion *deletion);
|
||||
|
||||
MOCKABLE_VIRTUAL void addClient();
|
||||
|
||||
MOCKABLE_VIRTUAL void removeClient();
|
||||
|
||||
MOCKABLE_VIRTUAL void drain(bool blocking);
|
||||
|
||||
protected:
|
||||
void stop();
|
||||
void safeStop();
|
||||
void ensureThread();
|
||||
MOCKABLE_VIRTUAL void clearQueue();
|
||||
MOCKABLE_VIRTUAL bool areElementsReleased();
|
||||
MOCKABLE_VIRTUAL bool shouldStop();
|
||||
|
||||
static void *run(void *);
|
||||
|
||||
std::atomic<bool> doWorkInBackground;
|
||||
std::atomic<int> elementsToRelease;
|
||||
std::unique_ptr<Thread> worker;
|
||||
int32_t numClients = 0;
|
||||
IDList<DeferrableDeletion, true> queue;
|
||||
std::mutex queueMutex;
|
||||
std::mutex threadMutex;
|
||||
std::condition_variable condition;
|
||||
};
|
||||
} // namespace NEO
|
||||
17
shared/source/memory_manager/definitions/engine_limits.h
Normal file
17
shared/source/memory_manager/definitions/engine_limits.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
namespace EngineLimits {
|
||||
|
||||
constexpr uint32_t maxHandleCount = 1u;
|
||||
|
||||
}; // namespace EngineLimits
|
||||
} // namespace NEO
|
||||
15
shared/source/memory_manager/definitions/storage_info.cpp
Normal file
15
shared/source/memory_manager/definitions/storage_info.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationProperties &properties) {
|
||||
return {};
|
||||
}
|
||||
uint32_t StorageInfo::getNumHandles() const { return 1u; }
|
||||
} // namespace NEO
|
||||
15
shared/source/memory_manager/definitions/storage_info.h
Normal file
15
shared/source/memory_manager/definitions/storage_info.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
namespace NEO {
|
||||
struct StorageInfo {
|
||||
uint32_t getNumHandles() const;
|
||||
uint32_t getMemoryBanks() const { return 0u; }
|
||||
};
|
||||
} // namespace NEO
|
||||
20
shared/source/memory_manager/eviction_status.h
Normal file
20
shared/source/memory_manager/eviction_status.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum class EvictionStatus : uint32_t {
|
||||
SUCCESS = 0,
|
||||
FAILED,
|
||||
NOT_APPLIED,
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
148
shared/source/memory_manager/gfx_partition.cpp
Normal file
148
shared/source/memory_manager/gfx_partition.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/gfx_partition.h"
|
||||
|
||||
#include "helpers/aligned_memory.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const std::array<HeapIndex, 4> GfxPartition::heap32Names{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_INTERNAL,
|
||||
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_EXTERNAL}};
|
||||
|
||||
const std::array<HeapIndex, 7> GfxPartition::heapNonSvmNames{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_INTERNAL,
|
||||
HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY,
|
||||
HeapIndex::HEAP_EXTERNAL,
|
||||
HeapIndex::HEAP_STANDARD,
|
||||
HeapIndex::HEAP_STANDARD64KB,
|
||||
HeapIndex::HEAP_EXTENDED}};
|
||||
|
||||
GfxPartition::GfxPartition() : osMemory(OSMemory::create()) {}
|
||||
|
||||
GfxPartition::~GfxPartition() {
|
||||
if (reservedCpuAddressRange) {
|
||||
osMemory->releaseCpuAddressRange(reservedCpuAddressRange, reservedCpuAddressRangeSize);
|
||||
}
|
||||
}
|
||||
|
||||
void GfxPartition::Heap::init(uint64_t base, uint64_t size) {
|
||||
this->base = base;
|
||||
this->size = size;
|
||||
|
||||
// Exclude very first and very last 64K from GPU address range allocation
|
||||
if (size > 2 * GfxPartition::heapGranularity) {
|
||||
size -= 2 * GfxPartition::heapGranularity;
|
||||
}
|
||||
|
||||
alloc = std::make_unique<HeapAllocator>(base + GfxPartition::heapGranularity, size);
|
||||
}
|
||||
|
||||
void GfxPartition::freeGpuAddressRange(uint64_t ptr, size_t size) {
|
||||
for (auto heapName : GfxPartition::heapNonSvmNames) {
|
||||
auto &heap = getHeap(heapName);
|
||||
if ((ptr > heap.getBase()) && ((ptr + size) < heap.getLimit())) {
|
||||
heap.free(ptr, size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices) {
|
||||
|
||||
/*
|
||||
* I. 64-bit builds:
|
||||
*
|
||||
* 1) 48-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |__________________________________|____|____|____|____|________________|______________|
|
||||
* | | | | | | | |
|
||||
* | gfxBase gfxTop
|
||||
* 0x0 0x0000800000000000 0x0000FFFFFFFFFFFF
|
||||
*
|
||||
*
|
||||
* 2) 47-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* gfxSize = 2^47 / 4 = 0x200000000000
|
||||
* ________________________________________________
|
||||
* / \
|
||||
* SVM / H0 H1 H2 H3 STANDARD STANDARD64K \ SVM
|
||||
* |________________|____|____|____|____|________________|______________|_______________|
|
||||
* | | | | | | | | |
|
||||
* | gfxBase gfxTop |
|
||||
* 0x0 reserveCpuAddressRange(gfxSize) 0x00007FFFFFFFFFFF
|
||||
* \_____________________________________ SVM _________________________________________/
|
||||
*
|
||||
*
|
||||
*
|
||||
* 3) Limited Range gfx layout (no SVM):
|
||||
*
|
||||
* H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |____|____|____|____|____________________|__________________|
|
||||
* | | | | | | |
|
||||
* gfxBase gfxTop
|
||||
* 0x0 0xFFF...FFF < 47 bit
|
||||
*
|
||||
*
|
||||
* II. 32-bit builds:
|
||||
*
|
||||
* 1) 32-bit Full Range SVM gfx layout:
|
||||
*
|
||||
* SVM H0 H1 H2 H3 STANDARD STANDARD64K
|
||||
* |_______|____|____|____|____|________________|______________|
|
||||
* | | | | | | | |
|
||||
* | gfxBase gfxTop
|
||||
* 0x0 0x100000000 gpuAddressSpace
|
||||
*/
|
||||
|
||||
uint64_t gfxTop = gpuAddressSpace + 1;
|
||||
uint64_t gfxBase = 0x0ull;
|
||||
const uint64_t gfxHeap32Size = 4 * MemoryConstants::gigaByte;
|
||||
|
||||
if (is32bit) {
|
||||
gfxBase = maxNBitValue(32) + 1;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
|
||||
} else {
|
||||
if (gpuAddressSpace == maxNBitValue(48)) {
|
||||
gfxBase = maxNBitValue(48 - 1) + 1;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gfxBase);
|
||||
} else if (gpuAddressSpace == maxNBitValue(47)) {
|
||||
reservedCpuAddressRangeSize = cpuAddressRangeSizeToReserve;
|
||||
UNRECOVERABLE_IF(reservedCpuAddressRangeSize == 0);
|
||||
reservedCpuAddressRange = osMemory->reserveCpuAddressRange(reservedCpuAddressRangeSize);
|
||||
UNRECOVERABLE_IF(reservedCpuAddressRange == nullptr);
|
||||
UNRECOVERABLE_IF(!isAligned<GfxPartition::heapGranularity>(reservedCpuAddressRange));
|
||||
gfxBase = reinterpret_cast<uint64_t>(reservedCpuAddressRange);
|
||||
gfxTop = gfxBase + reservedCpuAddressRangeSize;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, gpuAddressSpace + 1);
|
||||
} else if (gpuAddressSpace < maxNBitValue(47)) {
|
||||
gfxBase = 0ull;
|
||||
heapInit(HeapIndex::HEAP_SVM, 0ull, 0ull);
|
||||
} else {
|
||||
initAdditionalRange(gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex, numRootDevices);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto heap : GfxPartition::heap32Names) {
|
||||
heapInit(heap, gfxBase, gfxHeap32Size);
|
||||
gfxBase += gfxHeap32Size;
|
||||
}
|
||||
|
||||
uint64_t gfxStandardSize = alignDown((gfxTop - gfxBase) >> 1, heapGranularity);
|
||||
|
||||
heapInit(HeapIndex::HEAP_STANDARD, gfxBase, gfxStandardSize);
|
||||
gfxBase += gfxStandardSize;
|
||||
|
||||
// Split HEAP_STANDARD64K among root devices
|
||||
auto gfxStandard64KBSize = alignDown(gfxStandardSize / numRootDevices, GfxPartition::heapGranularity);
|
||||
heapInit(HeapIndex::HEAP_STANDARD64KB, gfxBase + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
100
shared/source/memory_manager/gfx_partition.h
Normal file
100
shared/source/memory_manager/gfx_partition.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/memory_constants.h"
|
||||
#include "os_interface/os_memory.h"
|
||||
#include "utilities/heap_allocator.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum class HeapIndex : uint32_t {
|
||||
HEAP_INTERNAL_DEVICE_MEMORY = 0u,
|
||||
HEAP_INTERNAL = 1u,
|
||||
HEAP_EXTERNAL_DEVICE_MEMORY = 2u,
|
||||
HEAP_EXTERNAL = 3u,
|
||||
HEAP_STANDARD,
|
||||
HEAP_STANDARD64KB,
|
||||
HEAP_SVM,
|
||||
HEAP_EXTENDED,
|
||||
|
||||
// Please put new heap indexes above this line
|
||||
TOTAL_HEAPS
|
||||
};
|
||||
|
||||
class GfxPartition {
|
||||
public:
|
||||
GfxPartition();
|
||||
MOCKABLE_VIRTUAL ~GfxPartition();
|
||||
|
||||
void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices);
|
||||
|
||||
void heapInit(HeapIndex heapIndex, uint64_t base, uint64_t size) {
|
||||
getHeap(heapIndex).init(base, size);
|
||||
}
|
||||
|
||||
uint64_t heapAllocate(HeapIndex heapIndex, size_t &size) {
|
||||
return getHeap(heapIndex).allocate(size);
|
||||
}
|
||||
|
||||
void heapFree(HeapIndex heapIndex, uint64_t ptr, size_t size) {
|
||||
getHeap(heapIndex).free(ptr, size);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL void freeGpuAddressRange(uint64_t ptr, size_t size);
|
||||
|
||||
uint64_t getHeapBase(HeapIndex heapIndex) {
|
||||
return getHeap(heapIndex).getBase();
|
||||
}
|
||||
|
||||
uint64_t getHeapLimit(HeapIndex heapIndex) {
|
||||
return getHeap(heapIndex).getLimit();
|
||||
}
|
||||
|
||||
uint64_t getHeapMinimalAddress(HeapIndex heapIndex) {
|
||||
return getHeapBase(heapIndex) + heapGranularity;
|
||||
}
|
||||
|
||||
bool isLimitedRange() { return getHeap(HeapIndex::HEAP_SVM).getSize() == 0ull; }
|
||||
|
||||
static const uint64_t heapGranularity = MemoryConstants::pageSize64k;
|
||||
|
||||
static const std::array<HeapIndex, 4> heap32Names;
|
||||
static const std::array<HeapIndex, 7> heapNonSvmNames;
|
||||
|
||||
protected:
|
||||
void initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices);
|
||||
|
||||
class Heap {
|
||||
public:
|
||||
Heap() = default;
|
||||
void init(uint64_t base, uint64_t size);
|
||||
uint64_t getBase() const { return base; }
|
||||
uint64_t getSize() const { return size; }
|
||||
uint64_t getLimit() const { return size ? base + size - 1 : 0; }
|
||||
uint64_t allocate(size_t &size) { return alloc->allocate(size); }
|
||||
void free(uint64_t ptr, size_t size) { alloc->free(ptr, size); }
|
||||
|
||||
protected:
|
||||
uint64_t base = 0, size = 0;
|
||||
std::unique_ptr<HeapAllocator> alloc;
|
||||
};
|
||||
|
||||
Heap &getHeap(HeapIndex heapIndex) {
|
||||
return heaps[static_cast<uint32_t>(heapIndex)];
|
||||
}
|
||||
|
||||
std::array<Heap, static_cast<uint32_t>(HeapIndex::TOTAL_HEAPS)> heaps;
|
||||
|
||||
void *reservedCpuAddressRange = nullptr;
|
||||
size_t reservedCpuAddressRangeSize = 0;
|
||||
std::unique_ptr<OSMemory> osMemory;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/gfx_partition.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void GfxPartition::initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices) {
|
||||
UNRECOVERABLE_IF("Invalid GPU Address Range!");
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
73
shared/source/memory_manager/graphics_allocation.cpp
Normal file
73
shared/source/memory_manager/graphics_allocation.cpp
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics_allocation.h"
|
||||
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
#include "opencl/source/utilities/logger.h"
|
||||
|
||||
namespace NEO {
|
||||
void GraphicsAllocation::setAllocationType(AllocationType allocationType) {
|
||||
this->allocationType = allocationType;
|
||||
FileLoggerInstance().logAllocation(this);
|
||||
}
|
||||
|
||||
GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress,
|
||||
size_t sizeIn, MemoryPool::Type pool)
|
||||
: rootDeviceIndex(rootDeviceIndex),
|
||||
gpuBaseAddress(baseAddress),
|
||||
gpuAddress(gpuAddress),
|
||||
size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
memoryPool(pool),
|
||||
allocationType(allocationType),
|
||||
usageInfos(MemoryManager::maxOsContextCount) {
|
||||
}
|
||||
|
||||
GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn,
|
||||
MemoryPool::Type pool)
|
||||
: rootDeviceIndex(rootDeviceIndex),
|
||||
gpuAddress(castToUint64(cpuPtrIn)),
|
||||
size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
memoryPool(pool),
|
||||
allocationType(allocationType),
|
||||
usageInfos(MemoryManager::maxOsContextCount) {
|
||||
sharingInfo.sharedHandle = sharedHandleIn;
|
||||
}
|
||||
|
||||
GraphicsAllocation::~GraphicsAllocation() = default;
|
||||
|
||||
void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
|
||||
if (usageInfos[contextId].taskCount == objectNotUsed) {
|
||||
registeredContextsNum++;
|
||||
}
|
||||
if (newTaskCount == objectNotUsed) {
|
||||
registeredContextsNum--;
|
||||
}
|
||||
usageInfos[contextId].taskCount = newTaskCount;
|
||||
}
|
||||
|
||||
std::string GraphicsAllocation::getAllocationInfoString() const {
|
||||
return "";
|
||||
}
|
||||
|
||||
uint32_t GraphicsAllocation::getUsedPageSize() const {
|
||||
switch (this->memoryPool) {
|
||||
case MemoryPool::System64KBPages:
|
||||
case MemoryPool::System64KBPagesWith32BitGpuAddressing:
|
||||
case MemoryPool::LocalMemory:
|
||||
return MemoryConstants::pageSize64k;
|
||||
default:
|
||||
return MemoryConstants::pageSize;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uint32_t GraphicsAllocation::objectNotUsed;
|
||||
constexpr uint32_t GraphicsAllocation::objectNotResident;
|
||||
} // namespace NEO
|
||||
293
shared/source/memory_manager/graphics_allocation.h
Normal file
293
shared/source/memory_manager/graphics_allocation.h
Normal file
@@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers/debug_helpers.h"
|
||||
#include "helpers/ptr_math.h"
|
||||
#include "memory_manager/host_ptr_defines.h"
|
||||
#include "memory_manager/memory_constants.h"
|
||||
#include "memory_manager/memory_pool.h"
|
||||
#include "utilities/idlist.h"
|
||||
#include "utilities/stackvec.h"
|
||||
|
||||
#include "engine_limits.h"
|
||||
#include "storage_info.h"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using osHandle = unsigned int;
|
||||
inline osHandle toOsHandle(const void *handle) {
|
||||
return static_cast<osHandle>(castToUint64(handle));
|
||||
}
|
||||
|
||||
enum class HeapIndex : uint32_t;
|
||||
|
||||
namespace Sharing {
|
||||
constexpr auto nonSharedResource = 0u;
|
||||
}
|
||||
|
||||
class Gmm;
|
||||
class MemoryManager;
|
||||
|
||||
class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
public:
|
||||
enum class AllocationType {
|
||||
UNKNOWN = 0,
|
||||
BUFFER,
|
||||
BUFFER_COMPRESSED,
|
||||
BUFFER_HOST_MEMORY,
|
||||
COMMAND_BUFFER,
|
||||
CONSTANT_SURFACE,
|
||||
DEVICE_QUEUE_BUFFER,
|
||||
EXTERNAL_HOST_PTR,
|
||||
FILL_PATTERN,
|
||||
GLOBAL_SURFACE,
|
||||
IMAGE,
|
||||
INDIRECT_OBJECT_HEAP,
|
||||
INSTRUCTION_HEAP,
|
||||
INTERNAL_HEAP,
|
||||
INTERNAL_HOST_MEMORY,
|
||||
KERNEL_ISA,
|
||||
LINEAR_STREAM,
|
||||
MAP_ALLOCATION,
|
||||
MCS,
|
||||
PIPE,
|
||||
PREEMPTION,
|
||||
PRINTF_SURFACE,
|
||||
PRIVATE_SURFACE,
|
||||
PROFILING_TAG_BUFFER,
|
||||
SCRATCH_SURFACE,
|
||||
SHARED_BUFFER,
|
||||
SHARED_CONTEXT_IMAGE,
|
||||
SHARED_IMAGE,
|
||||
SHARED_RESOURCE_COPY,
|
||||
SURFACE_STATE_HEAP,
|
||||
SVM_CPU,
|
||||
SVM_GPU,
|
||||
SVM_ZERO_COPY,
|
||||
TAG_BUFFER,
|
||||
GLOBAL_FENCE,
|
||||
TIMESTAMP_PACKET_TAG_BUFFER,
|
||||
WRITE_COMBINED,
|
||||
RING_BUFFER,
|
||||
SEMAPHORE_BUFFER
|
||||
};
|
||||
|
||||
~GraphicsAllocation() override;
|
||||
GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
|
||||
GraphicsAllocation(const GraphicsAllocation &) = delete;
|
||||
|
||||
GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool);
|
||||
|
||||
GraphicsAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool);
|
||||
|
||||
uint32_t getRootDeviceIndex() const { return rootDeviceIndex; }
|
||||
void *getUnderlyingBuffer() const { return cpuPtr; }
|
||||
void *getDriverAllocatedCpuPtr() const { return driverAllocatedCpuPointer; }
|
||||
void setDriverAllocatedCpuPtr(void *allocatedCpuPtr) { driverAllocatedCpuPointer = allocatedCpuPtr; }
|
||||
|
||||
void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
|
||||
this->cpuPtr = cpuPtr;
|
||||
this->gpuAddress = gpuAddress;
|
||||
}
|
||||
size_t getUnderlyingBufferSize() const { return size; }
|
||||
void setSize(size_t size) { this->size = size; }
|
||||
|
||||
uint64_t getAllocationOffset() const {
|
||||
return allocationOffset;
|
||||
}
|
||||
void setAllocationOffset(uint64_t offset) {
|
||||
allocationOffset = offset;
|
||||
}
|
||||
|
||||
uint64_t getGpuBaseAddress() const {
|
||||
return gpuBaseAddress;
|
||||
}
|
||||
void setGpuBaseAddress(uint64_t baseAddress) {
|
||||
gpuBaseAddress = baseAddress;
|
||||
}
|
||||
uint64_t getGpuAddress() const {
|
||||
DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
|
||||
return gpuAddress + allocationOffset;
|
||||
}
|
||||
uint64_t getGpuAddressToPatch() const {
|
||||
DEBUG_BREAK_IF(gpuAddress < gpuBaseAddress);
|
||||
return gpuAddress + allocationOffset - gpuBaseAddress;
|
||||
}
|
||||
|
||||
void lock(void *ptr) { lockedPtr = ptr; }
|
||||
void unlock() { lockedPtr = nullptr; }
|
||||
bool isLocked() const { return lockedPtr != nullptr; }
|
||||
void *getLockedPtr() const { return lockedPtr; }
|
||||
|
||||
bool isCoherent() const { return allocationInfo.flags.coherent; }
|
||||
void setCoherent(bool coherentIn) { allocationInfo.flags.coherent = coherentIn; }
|
||||
void setEvictable(bool evictable) { allocationInfo.flags.evictable = evictable; }
|
||||
bool peekEvictable() const { return allocationInfo.flags.evictable; }
|
||||
bool isFlushL3Required() const { return allocationInfo.flags.flushL3Required; }
|
||||
void setFlushL3Required(bool flushL3Required) { allocationInfo.flags.flushL3Required = flushL3Required; }
|
||||
bool is32BitAllocation() const { return allocationInfo.flags.is32BitAllocation; }
|
||||
void set32BitAllocation(bool is32BitAllocation) { allocationInfo.flags.is32BitAllocation = is32BitAllocation; }
|
||||
|
||||
void setAubWritable(bool writable, uint32_t banks);
|
||||
bool isAubWritable(uint32_t banks) const;
|
||||
void setTbxWritable(bool writable, uint32_t banks);
|
||||
bool isTbxWritable(uint32_t banks) const;
|
||||
void setAllocDumpable(bool dumpable) { aubInfo.allocDumpable = dumpable; }
|
||||
bool isAllocDumpable() const { return aubInfo.allocDumpable; }
|
||||
bool isMemObjectsAllocationWithWritableFlags() const { return aubInfo.memObjectsAllocationWithWritableFlags; }
|
||||
void setMemObjectsAllocationWithWritableFlags(bool newValue) { aubInfo.memObjectsAllocationWithWritableFlags = newValue; }
|
||||
|
||||
void incReuseCount() { sharingInfo.reuseCount++; }
|
||||
void decReuseCount() { sharingInfo.reuseCount--; }
|
||||
uint32_t peekReuseCount() const { return sharingInfo.reuseCount; }
|
||||
osHandle peekSharedHandle() const { return sharingInfo.sharedHandle; }
|
||||
|
||||
void setAllocationType(AllocationType allocationType);
|
||||
AllocationType getAllocationType() const { return allocationType; }
|
||||
|
||||
MemoryPool::Type getMemoryPool() const { return memoryPool; }
|
||||
|
||||
bool isUsed() const { return registeredContextsNum > 0; }
|
||||
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
|
||||
bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
|
||||
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
|
||||
uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
|
||||
void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
|
||||
uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
|
||||
void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
|
||||
|
||||
bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
|
||||
void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) { usageInfos[contextId].residencyTaskCount = newTaskCount; }
|
||||
uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
|
||||
void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); }
|
||||
bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
|
||||
|
||||
virtual std::string getAllocationInfoString() const;
|
||||
virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; }
|
||||
|
||||
static bool isCpuAccessRequired(AllocationType allocationType) {
|
||||
return allocationType == AllocationType::COMMAND_BUFFER ||
|
||||
allocationType == AllocationType::CONSTANT_SURFACE ||
|
||||
allocationType == AllocationType::GLOBAL_SURFACE ||
|
||||
allocationType == AllocationType::INTERNAL_HEAP ||
|
||||
allocationType == AllocationType::LINEAR_STREAM ||
|
||||
allocationType == AllocationType::PIPE ||
|
||||
allocationType == AllocationType::PRINTF_SURFACE ||
|
||||
allocationType == AllocationType::TIMESTAMP_PACKET_TAG_BUFFER ||
|
||||
allocationType == AllocationType::RING_BUFFER ||
|
||||
allocationType == AllocationType::SEMAPHORE_BUFFER;
|
||||
}
|
||||
void *getReservedAddressPtr() const {
|
||||
return this->reservedAddressRangeInfo.addressPtr;
|
||||
}
|
||||
size_t getReservedAddressSize() const {
|
||||
return this->reservedAddressRangeInfo.rangeSize;
|
||||
}
|
||||
void setReservedAddressRange(void *reserveAddress, size_t size) {
|
||||
this->reservedAddressRangeInfo.addressPtr = reserveAddress;
|
||||
this->reservedAddressRangeInfo.rangeSize = size;
|
||||
}
|
||||
|
||||
Gmm *getDefaultGmm() const {
|
||||
return getGmm(0u);
|
||||
}
|
||||
Gmm *getGmm(uint32_t handleId) const {
|
||||
return gmms[handleId];
|
||||
}
|
||||
void setDefaultGmm(Gmm *gmm) {
|
||||
setGmm(gmm, 0u);
|
||||
}
|
||||
void setGmm(Gmm *gmm, uint32_t handleId) {
|
||||
gmms[handleId] = gmm;
|
||||
}
|
||||
|
||||
uint32_t getNumHandles() const { return storageInfo.getNumHandles(); }
|
||||
uint32_t getUsedPageSize() const;
|
||||
|
||||
OsHandleStorage fragmentsStorage;
|
||||
StorageInfo storageInfo = {};
|
||||
|
||||
static constexpr uint32_t defaultBank = 0b1u;
|
||||
static constexpr uint32_t allBanks = 0xffffffff;
|
||||
constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
|
||||
constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
protected:
|
||||
struct UsageInfo {
|
||||
uint32_t taskCount = objectNotUsed;
|
||||
uint32_t residencyTaskCount = objectNotResident;
|
||||
uint32_t inspectionId = 0u;
|
||||
};
|
||||
struct AubInfo {
|
||||
uint32_t aubWritable = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t tbxWritable = std::numeric_limits<uint32_t>::max();
|
||||
bool allocDumpable = false;
|
||||
bool memObjectsAllocationWithWritableFlags = false;
|
||||
};
|
||||
struct SharingInfo {
|
||||
uint32_t reuseCount = 0;
|
||||
osHandle sharedHandle = Sharing::nonSharedResource;
|
||||
};
|
||||
struct AllocationInfo {
|
||||
union {
|
||||
struct {
|
||||
uint32_t coherent : 1;
|
||||
uint32_t evictable : 1;
|
||||
uint32_t flushL3Required : 1;
|
||||
uint32_t is32BitAllocation : 1;
|
||||
uint32_t reserved : 28;
|
||||
} flags;
|
||||
uint32_t allFlags = 0u;
|
||||
};
|
||||
static_assert(sizeof(AllocationInfo::flags) == sizeof(AllocationInfo::allFlags), "");
|
||||
AllocationInfo() {
|
||||
flags.coherent = false;
|
||||
flags.evictable = true;
|
||||
flags.flushL3Required = true;
|
||||
flags.is32BitAllocation = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct ReservedAddressRange {
|
||||
void *addressPtr = nullptr;
|
||||
size_t rangeSize = 0;
|
||||
};
|
||||
|
||||
friend class SubmissionAggregator;
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
AllocationInfo allocationInfo;
|
||||
AubInfo aubInfo;
|
||||
SharingInfo sharingInfo;
|
||||
ReservedAddressRange reservedAddressRangeInfo;
|
||||
|
||||
uint64_t allocationOffset = 0u;
|
||||
uint64_t gpuBaseAddress = 0;
|
||||
uint64_t gpuAddress = 0;
|
||||
void *driverAllocatedCpuPointer = nullptr;
|
||||
size_t size = 0;
|
||||
void *cpuPtr = nullptr;
|
||||
void *lockedPtr = nullptr;
|
||||
|
||||
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
|
||||
AllocationType allocationType = AllocationType::UNKNOWN;
|
||||
|
||||
StackVec<UsageInfo, 32> usageInfos;
|
||||
std::atomic<uint32_t> registeredContextsNum{0};
|
||||
std::array<Gmm *, EngineLimits::maxHandleCount> gmms{};
|
||||
};
|
||||
} // namespace NEO
|
||||
17
shared/source/memory_manager/graphics_allocation_extra.cpp
Normal file
17
shared/source/memory_manager/graphics_allocation_extra.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void GraphicsAllocation::setAubWritable(bool writable, uint32_t banks) { aubInfo.aubWritable = writable; }
|
||||
bool GraphicsAllocation::isAubWritable(uint32_t banks) const { return (aubInfo.aubWritable != 0); }
|
||||
void GraphicsAllocation::setTbxWritable(bool writable, uint32_t banks) { aubInfo.tbxWritable = writable; }
|
||||
bool GraphicsAllocation::isTbxWritable(uint32_t banks) const { return (aubInfo.tbxWritable != 0); }
|
||||
|
||||
} // namespace NEO
|
||||
76
shared/source/memory_manager/host_ptr_defines.h
Normal file
76
shared/source/memory_manager/host_ptr_defines.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct OsHandle;
|
||||
struct ResidencyData;
|
||||
|
||||
using OsGraphicsHandle = OsHandle;
|
||||
|
||||
constexpr int maxFragmentsCount = 3;
|
||||
|
||||
enum class FragmentPosition {
|
||||
NONE = 0,
|
||||
LEADING,
|
||||
MIDDLE,
|
||||
TRAILING
|
||||
};
|
||||
|
||||
enum OverlapStatus {
|
||||
FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER = 0,
|
||||
FRAGMENT_WITHIN_STORED_FRAGMENT,
|
||||
FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT,
|
||||
FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT,
|
||||
FRAGMENT_NOT_CHECKED
|
||||
};
|
||||
|
||||
enum RequirementsStatus {
|
||||
SUCCESS = 0,
|
||||
FATAL
|
||||
};
|
||||
|
||||
struct PartialAllocation {
|
||||
FragmentPosition fragmentPosition = FragmentPosition::NONE;
|
||||
const void *allocationPtr = nullptr;
|
||||
size_t allocationSize = 0u;
|
||||
};
|
||||
|
||||
struct AllocationRequirements {
|
||||
PartialAllocation allocationFragments[maxFragmentsCount];
|
||||
uint64_t totalRequiredSize = 0u;
|
||||
uint32_t requiredFragmentsCount = 0u;
|
||||
};
|
||||
|
||||
struct FragmentStorage {
|
||||
const void *fragmentCpuPointer = nullptr;
|
||||
size_t fragmentSize = 0;
|
||||
int refCount = 0;
|
||||
OsHandle *osInternalStorage = nullptr;
|
||||
ResidencyData *residency = nullptr;
|
||||
bool driverAllocation = false;
|
||||
};
|
||||
|
||||
struct AllocationStorageData {
|
||||
OsHandle *osHandleStorage = nullptr;
|
||||
size_t fragmentSize = 0;
|
||||
const void *cpuPtr = nullptr;
|
||||
bool freeTheFragment = false;
|
||||
ResidencyData *residency = nullptr;
|
||||
};
|
||||
|
||||
struct OsHandleStorage {
|
||||
AllocationStorageData fragmentStorageData[maxFragmentsCount];
|
||||
uint32_t fragmentCount = 0;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
287
shared/source/memory_manager/host_ptr_manager.cpp
Normal file
287
shared/source/memory_manager/host_ptr_manager.cpp
Normal file
@@ -0,0 +1,287 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/host_ptr_manager.h"
|
||||
|
||||
#include "memory_manager/memory_manager.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
HostPtrFragmentsContainer::iterator HostPtrManager::findElement(const void *ptr) {
|
||||
auto nextElement = partialAllocations.lower_bound(ptr);
|
||||
auto element = nextElement;
|
||||
if (element != partialAllocations.end()) {
|
||||
auto &storedFragment = element->second;
|
||||
if (storedFragment.fragmentCpuPointer <= ptr) {
|
||||
return element;
|
||||
} else if (element != partialAllocations.begin()) {
|
||||
element--;
|
||||
auto &storedFragment = element->second;
|
||||
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
|
||||
if (storedFragment.fragmentSize == 0) {
|
||||
storedEndAddress++;
|
||||
}
|
||||
if ((uintptr_t)ptr < (uintptr_t)storedEndAddress) {
|
||||
return element;
|
||||
}
|
||||
}
|
||||
} else if (element != partialAllocations.begin()) {
|
||||
element--;
|
||||
auto &storedFragment = element->second;
|
||||
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
|
||||
if (storedFragment.fragmentSize == 0) {
|
||||
storedEndAddress++;
|
||||
}
|
||||
if ((uintptr_t)ptr < (uintptr_t)storedEndAddress) {
|
||||
return element;
|
||||
}
|
||||
}
|
||||
return partialAllocations.end();
|
||||
}
|
||||
|
||||
AllocationRequirements HostPtrManager::getAllocationRequirements(const void *inputPtr, size_t size) {
|
||||
AllocationRequirements requiredAllocations;
|
||||
|
||||
auto allocationCount = 0;
|
||||
auto wholeAllocationSize = alignSizeWholePage(inputPtr, size);
|
||||
|
||||
auto alignedStartAddress = alignDown(inputPtr, MemoryConstants::pageSize);
|
||||
bool leadingNeeded = false;
|
||||
|
||||
if (alignedStartAddress != inputPtr) {
|
||||
leadingNeeded = true;
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignedStartAddress;
|
||||
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::LEADING;
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationSize = MemoryConstants::pageSize;
|
||||
allocationCount++;
|
||||
}
|
||||
|
||||
auto endAddress = ptrOffset(inputPtr, size);
|
||||
auto alignedEndAddress = alignDown(endAddress, MemoryConstants::pageSize);
|
||||
bool trailingNeeded = false;
|
||||
|
||||
if (alignedEndAddress != endAddress && alignedEndAddress != alignedStartAddress) {
|
||||
trailingNeeded = true;
|
||||
}
|
||||
|
||||
auto middleSize = wholeAllocationSize - (trailingNeeded + leadingNeeded) * MemoryConstants::pageSize;
|
||||
if (middleSize) {
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignUp(inputPtr, MemoryConstants::pageSize);
|
||||
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::MIDDLE;
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationSize = middleSize;
|
||||
allocationCount++;
|
||||
}
|
||||
|
||||
if (trailingNeeded) {
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationPtr = alignedEndAddress;
|
||||
requiredAllocations.allocationFragments[allocationCount].fragmentPosition = FragmentPosition::TRAILING;
|
||||
requiredAllocations.allocationFragments[allocationCount].allocationSize = MemoryConstants::pageSize;
|
||||
allocationCount++;
|
||||
}
|
||||
|
||||
requiredAllocations.totalRequiredSize = wholeAllocationSize;
|
||||
requiredAllocations.requiredFragmentsCount = allocationCount;
|
||||
|
||||
return requiredAllocations;
|
||||
}
|
||||
|
||||
OsHandleStorage HostPtrManager::populateAlreadyAllocatedFragments(AllocationRequirements &requirements) {
|
||||
OsHandleStorage handleStorage;
|
||||
for (unsigned int i = 0; i < requirements.requiredFragmentsCount; i++) {
|
||||
OverlapStatus overlapStatus = OverlapStatus::FRAGMENT_NOT_CHECKED;
|
||||
FragmentStorage *fragmentStorage = getFragmentAndCheckForOverlaps(const_cast<void *>(requirements.allocationFragments[i].allocationPtr), requirements.allocationFragments[i].allocationSize, overlapStatus);
|
||||
if (overlapStatus == OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT) {
|
||||
UNRECOVERABLE_IF(fragmentStorage == nullptr);
|
||||
fragmentStorage->refCount++;
|
||||
handleStorage.fragmentStorageData[i].osHandleStorage = fragmentStorage->osInternalStorage;
|
||||
handleStorage.fragmentStorageData[i].cpuPtr = requirements.allocationFragments[i].allocationPtr;
|
||||
handleStorage.fragmentStorageData[i].fragmentSize = requirements.allocationFragments[i].allocationSize;
|
||||
handleStorage.fragmentStorageData[i].residency = fragmentStorage->residency;
|
||||
} else if (overlapStatus != OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
|
||||
if (fragmentStorage != nullptr) {
|
||||
DEBUG_BREAK_IF(overlapStatus != OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT);
|
||||
fragmentStorage->refCount++;
|
||||
handleStorage.fragmentStorageData[i].osHandleStorage = fragmentStorage->osInternalStorage;
|
||||
handleStorage.fragmentStorageData[i].residency = fragmentStorage->residency;
|
||||
} else {
|
||||
DEBUG_BREAK_IF(overlapStatus != OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER);
|
||||
}
|
||||
handleStorage.fragmentStorageData[i].cpuPtr = requirements.allocationFragments[i].allocationPtr;
|
||||
handleStorage.fragmentStorageData[i].fragmentSize = requirements.allocationFragments[i].allocationSize;
|
||||
} else {
|
||||
//abort whole application instead of silently passing.
|
||||
abortExecution();
|
||||
return handleStorage;
|
||||
}
|
||||
}
|
||||
handleStorage.fragmentCount = requirements.requiredFragmentsCount;
|
||||
return handleStorage;
|
||||
}
|
||||
|
||||
void HostPtrManager::storeFragment(FragmentStorage &fragment) {
|
||||
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
|
||||
auto element = findElement(fragment.fragmentCpuPointer);
|
||||
if (element != partialAllocations.end()) {
|
||||
element->second.refCount++;
|
||||
} else {
|
||||
fragment.refCount++;
|
||||
partialAllocations.insert(std::pair<const void *, FragmentStorage>(fragment.fragmentCpuPointer, fragment));
|
||||
}
|
||||
}
|
||||
|
||||
void HostPtrManager::storeFragment(AllocationStorageData &storageData) {
|
||||
FragmentStorage fragment;
|
||||
fragment.fragmentCpuPointer = const_cast<void *>(storageData.cpuPtr);
|
||||
fragment.fragmentSize = storageData.fragmentSize;
|
||||
fragment.osInternalStorage = storageData.osHandleStorage;
|
||||
fragment.residency = storageData.residency;
|
||||
storeFragment(fragment);
|
||||
}
|
||||
|
||||
std::unique_lock<std::recursive_mutex> HostPtrManager::obtainOwnership() {
|
||||
return std::unique_lock<std::recursive_mutex>(allocationsMutex);
|
||||
}
|
||||
|
||||
void HostPtrManager::releaseHandleStorage(OsHandleStorage &fragments) {
|
||||
for (int i = 0; i < maxFragmentsCount; i++) {
|
||||
if (fragments.fragmentStorageData[i].fragmentSize || fragments.fragmentStorageData[i].cpuPtr) {
|
||||
fragments.fragmentStorageData[i].freeTheFragment = releaseHostPtr(fragments.fragmentStorageData[i].cpuPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool HostPtrManager::releaseHostPtr(const void *ptr) {
|
||||
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
|
||||
bool fragmentReadyToBeReleased = false;
|
||||
|
||||
auto element = findElement(ptr);
|
||||
|
||||
DEBUG_BREAK_IF(element == partialAllocations.end());
|
||||
|
||||
element->second.refCount--;
|
||||
if (element->second.refCount <= 0) {
|
||||
fragmentReadyToBeReleased = true;
|
||||
partialAllocations.erase(element);
|
||||
}
|
||||
|
||||
return fragmentReadyToBeReleased;
|
||||
}
|
||||
|
||||
FragmentStorage *HostPtrManager::getFragment(const void *inputPtr) {
|
||||
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
|
||||
auto element = findElement(inputPtr);
|
||||
if (element != partialAllocations.end()) {
|
||||
return &element->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//for given inputs see if any allocation overlaps
|
||||
FragmentStorage *HostPtrManager::getFragmentAndCheckForOverlaps(const void *inPtr, size_t size, OverlapStatus &overlappingStatus) {
|
||||
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
|
||||
void *inputPtr = const_cast<void *>(inPtr);
|
||||
auto nextElement = partialAllocations.lower_bound(inputPtr);
|
||||
auto element = nextElement;
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER;
|
||||
|
||||
if (element != partialAllocations.begin()) {
|
||||
element--;
|
||||
}
|
||||
|
||||
if (element != partialAllocations.end()) {
|
||||
auto &storedFragment = element->second;
|
||||
if (storedFragment.fragmentCpuPointer == inputPtr && storedFragment.fragmentSize == size) {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT;
|
||||
return &element->second;
|
||||
}
|
||||
|
||||
auto storedEndAddress = (uintptr_t)storedFragment.fragmentCpuPointer + storedFragment.fragmentSize;
|
||||
auto inputEndAddress = (uintptr_t)inputPtr + size;
|
||||
|
||||
if (inputPtr >= storedFragment.fragmentCpuPointer && (uintptr_t)inputPtr < (uintptr_t)storedEndAddress) {
|
||||
if (inputEndAddress <= storedEndAddress) {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT;
|
||||
return &element->second;
|
||||
} else {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
//next fragment doesn't have to be after the inputPtr
|
||||
if (nextElement != partialAllocations.end()) {
|
||||
auto &storedNextElement = nextElement->second;
|
||||
auto storedNextEndAddress = (uintptr_t)storedNextElement.fragmentCpuPointer + storedNextElement.fragmentSize;
|
||||
auto storedNextStartAddress = (uintptr_t)storedNextElement.fragmentCpuPointer;
|
||||
//check if this allocation is after the inputPtr
|
||||
if ((uintptr_t)inputPtr < storedNextStartAddress) {
|
||||
if (inputEndAddress > storedNextStartAddress) {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
|
||||
return nullptr;
|
||||
}
|
||||
} else if (inputEndAddress > storedNextEndAddress) {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT;
|
||||
return nullptr;
|
||||
} else {
|
||||
DEBUG_BREAK_IF((uintptr_t)inputPtr != storedNextStartAddress);
|
||||
if (inputEndAddress < storedNextEndAddress) {
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT;
|
||||
} else {
|
||||
DEBUG_BREAK_IF(inputEndAddress != storedNextEndAddress);
|
||||
overlappingStatus = OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT;
|
||||
}
|
||||
return &nextElement->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
OsHandleStorage HostPtrManager::prepareOsStorageForAllocation(MemoryManager &memoryManager, size_t size, const void *ptr, uint32_t rootDeviceIndex) {
|
||||
std::lock_guard<decltype(allocationsMutex)> lock(allocationsMutex);
|
||||
auto requirements = HostPtrManager::getAllocationRequirements(ptr, size);
|
||||
UNRECOVERABLE_IF(checkAllocationsForOverlapping(memoryManager, &requirements) == RequirementsStatus::FATAL);
|
||||
auto osStorage = populateAlreadyAllocatedFragments(requirements);
|
||||
if (osStorage.fragmentCount > 0) {
|
||||
if (memoryManager.populateOsHandles(osStorage, rootDeviceIndex) != MemoryManager::AllocationStatus::Success) {
|
||||
memoryManager.cleanOsHandles(osStorage, rootDeviceIndex);
|
||||
osStorage.fragmentCount = 0;
|
||||
}
|
||||
}
|
||||
return osStorage;
|
||||
}
|
||||
|
||||
RequirementsStatus HostPtrManager::checkAllocationsForOverlapping(MemoryManager &memoryManager, AllocationRequirements *requirements) {
|
||||
UNRECOVERABLE_IF(requirements == nullptr);
|
||||
|
||||
RequirementsStatus status = RequirementsStatus::SUCCESS;
|
||||
|
||||
for (unsigned int i = 0; i < requirements->requiredFragmentsCount; i++) {
|
||||
OverlapStatus overlapStatus = OverlapStatus::FRAGMENT_NOT_CHECKED;
|
||||
|
||||
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
|
||||
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
|
||||
// clean temporary allocations
|
||||
memoryManager.cleanTemporaryAllocationListOnAllEngines(false);
|
||||
|
||||
// check overlapping again
|
||||
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
|
||||
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
|
||||
|
||||
// Wait for completion
|
||||
memoryManager.cleanTemporaryAllocationListOnAllEngines(true);
|
||||
|
||||
// check overlapping last time
|
||||
getFragmentAndCheckForOverlaps(requirements->allocationFragments[i].allocationPtr, requirements->allocationFragments[i].allocationSize, overlapStatus);
|
||||
if (overlapStatus == OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT) {
|
||||
status = RequirementsStatus::FATAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
38
shared/source/memory_manager/host_ptr_manager.h
Normal file
38
shared/source/memory_manager/host_ptr_manager.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/host_ptr_defines.h"
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using HostPtrFragmentsContainer = std::map<const void *, FragmentStorage>;
|
||||
class MemoryManager;
|
||||
class HostPtrManager {
|
||||
public:
|
||||
FragmentStorage *getFragment(const void *inputPtr);
|
||||
OsHandleStorage prepareOsStorageForAllocation(MemoryManager &memoryManager, size_t size, const void *ptr, uint32_t rootDeviceIndex);
|
||||
void releaseHandleStorage(OsHandleStorage &fragments);
|
||||
bool releaseHostPtr(const void *ptr);
|
||||
void storeFragment(AllocationStorageData &storageData);
|
||||
void storeFragment(FragmentStorage &fragment);
|
||||
std::unique_lock<std::recursive_mutex> obtainOwnership();
|
||||
|
||||
protected:
|
||||
static AllocationRequirements getAllocationRequirements(const void *inputPtr, size_t size);
|
||||
OsHandleStorage populateAlreadyAllocatedFragments(AllocationRequirements &requirements);
|
||||
FragmentStorage *getFragmentAndCheckForOverlaps(const void *inputPtr, size_t size, OverlapStatus &overlappingStatus);
|
||||
RequirementsStatus checkAllocationsForOverlapping(MemoryManager &memoryManager, AllocationRequirements *requirements);
|
||||
|
||||
HostPtrFragmentsContainer::iterator findElement(const void *ptr);
|
||||
HostPtrFragmentsContainer partialAllocations;
|
||||
std::recursive_mutex allocationsMutex;
|
||||
};
|
||||
} // namespace NEO
|
||||
102
shared/source/memory_manager/internal_allocation_storage.cpp
Normal file
102
shared/source/memory_manager/internal_allocation_storage.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/internal_allocation_storage.h"
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "memory_manager/host_ptr_manager.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
#include "os_interface/os_context.h"
|
||||
|
||||
namespace NEO {
|
||||
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver){};
|
||||
void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage) {
|
||||
uint32_t taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId());
|
||||
|
||||
if (allocationUsage == REUSABLE_ALLOCATION) {
|
||||
taskCount = commandStreamReceiver.peekTaskCount();
|
||||
}
|
||||
|
||||
storeAllocationWithTaskCount(std::move(gfxAllocation), allocationUsage, taskCount);
|
||||
}
|
||||
void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage, uint32_t taskCount) {
|
||||
if (allocationUsage == REUSABLE_ALLOCATION) {
|
||||
if (DebugManager.flags.DisableResourceRecycling.get()) {
|
||||
commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release());
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto &allocationsList = (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse;
|
||||
gfxAllocation->updateTaskCount(taskCount, commandStreamReceiver.getOsContext().getContextId());
|
||||
allocationsList.pushTailOne(*gfxAllocation.release());
|
||||
}
|
||||
|
||||
void InternalAllocationStorage::cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) {
|
||||
freeAllocationsList(waitTaskCount, (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse);
|
||||
}
|
||||
|
||||
void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList) {
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
auto lock = memoryManager->getHostPtrManager()->obtainOwnership();
|
||||
|
||||
GraphicsAllocation *curr = allocationsList.detachNodes();
|
||||
|
||||
IDList<GraphicsAllocation, false, true> allocationsLeft;
|
||||
while (curr != nullptr) {
|
||||
auto *next = curr->next;
|
||||
if (curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount) {
|
||||
memoryManager->freeGraphicsMemory(curr);
|
||||
} else {
|
||||
allocationsLeft.pushTailOne(*curr);
|
||||
}
|
||||
curr = next;
|
||||
}
|
||||
|
||||
if (allocationsLeft.peekIsEmpty() == false) {
|
||||
allocationsList.splice(*allocationsLeft.detachNodes());
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, GraphicsAllocation::AllocationType allocationType) {
|
||||
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver, allocationType);
|
||||
return allocation;
|
||||
}
|
||||
|
||||
struct ReusableAllocationRequirements {
|
||||
size_t requiredMinimalSize;
|
||||
volatile uint32_t *csrTagAddress;
|
||||
GraphicsAllocation::AllocationType allocationType;
|
||||
uint32_t contextId;
|
||||
};
|
||||
|
||||
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, GraphicsAllocation::AllocationType allocationType) {
|
||||
ReusableAllocationRequirements req;
|
||||
req.requiredMinimalSize = requiredMinimalSize;
|
||||
req.csrTagAddress = commandStreamReceiver.getTagAddress();
|
||||
req.allocationType = allocationType;
|
||||
req.contextId = commandStreamReceiver.getOsContext().getContextId();
|
||||
GraphicsAllocation *a = nullptr;
|
||||
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
|
||||
return std::unique_ptr<GraphicsAllocation>(retAlloc);
|
||||
}
|
||||
|
||||
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
|
||||
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
|
||||
auto *curr = head;
|
||||
while (curr != nullptr) {
|
||||
auto currentTagValue = *req->csrTagAddress;
|
||||
if ((req->allocationType == curr->getAllocationType()) &&
|
||||
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
|
||||
(currentTagValue >= curr->getTaskCount(req->contextId))) {
|
||||
return removeOneImpl(curr, nullptr);
|
||||
}
|
||||
curr = curr->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
32
shared/source/memory_manager/internal_allocation_storage.h
Normal file
32
shared/source/memory_manager/internal_allocation_storage.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/allocations_list.h"
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
|
||||
class InternalAllocationStorage {
|
||||
public:
|
||||
MOCKABLE_VIRTUAL ~InternalAllocationStorage() = default;
|
||||
InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver);
|
||||
MOCKABLE_VIRTUAL void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage);
|
||||
void storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage);
|
||||
void storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage, uint32_t taskCount);
|
||||
std::unique_ptr<GraphicsAllocation> obtainReusableAllocation(size_t requiredSize, GraphicsAllocation::AllocationType allocationType);
|
||||
AllocationsList &getTemporaryAllocations() { return temporaryAllocations; }
|
||||
AllocationsList &getAllocationsForReuse() { return allocationsForReuse; }
|
||||
|
||||
protected:
|
||||
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
|
||||
CommandStreamReceiver &commandStreamReceiver;
|
||||
|
||||
AllocationsList temporaryAllocations;
|
||||
AllocationsList allocationsForReuse;
|
||||
};
|
||||
} // namespace NEO
|
||||
52
shared/source/memory_manager/local_memory_usage.cpp
Normal file
52
shared/source/memory_manager/local_memory_usage.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/local_memory_usage.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <iterator>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector(uint32_t banksCount) : banksCount(banksCount) {
|
||||
UNRECOVERABLE_IF(banksCount == 0);
|
||||
|
||||
memorySizes.reset(new std::atomic<uint64_t>[banksCount]);
|
||||
for (uint32_t i = 0; i < banksCount; i++) {
|
||||
memorySizes[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t LocalMemoryUsageBankSelector::getLeastOccupiedBank() {
|
||||
auto leastOccupiedBankIterator = std::min_element(memorySizes.get(), memorySizes.get() + banksCount);
|
||||
return static_cast<uint32_t>(std::distance(memorySizes.get(), leastOccupiedBankIterator));
|
||||
}
|
||||
|
||||
void LocalMemoryUsageBankSelector::freeOnBank(uint32_t bankIndex, uint64_t allocationSize) {
|
||||
UNRECOVERABLE_IF(bankIndex >= banksCount);
|
||||
memorySizes[bankIndex] -= allocationSize;
|
||||
}
|
||||
void LocalMemoryUsageBankSelector::reserveOnBank(uint32_t bankIndex, uint64_t allocationSize) {
|
||||
UNRECOVERABLE_IF(bankIndex >= banksCount);
|
||||
memorySizes[bankIndex] += allocationSize;
|
||||
}
|
||||
|
||||
void LocalMemoryUsageBankSelector::updateUsageInfo(uint32_t memoryBanks, uint64_t allocationSize, bool reserve) {
|
||||
auto banks = std::bitset<32>(memoryBanks);
|
||||
for (uint32_t bankIndex = 0; bankIndex < banks.size() && bankIndex < banksCount; bankIndex++) {
|
||||
if (banks.test(bankIndex)) {
|
||||
if (reserve) {
|
||||
reserveOnBank(bankIndex, allocationSize);
|
||||
} else {
|
||||
freeOnBank(bankIndex, allocationSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
41
shared/source/memory_manager/local_memory_usage.h
Normal file
41
shared/source/memory_manager/local_memory_usage.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers/debug_helpers.h"
|
||||
#include "helpers/non_copyable_or_moveable.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
class LocalMemoryUsageBankSelector : public NonCopyableOrMovableClass {
|
||||
public:
|
||||
LocalMemoryUsageBankSelector() = delete;
|
||||
LocalMemoryUsageBankSelector(uint32_t banksCount);
|
||||
uint32_t getLeastOccupiedBank();
|
||||
void reserveOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
|
||||
updateUsageInfo(memoryBanks, allocationSize, true);
|
||||
}
|
||||
void freeOnBanks(uint32_t memoryBanks, uint64_t allocationSize) {
|
||||
updateUsageInfo(memoryBanks, allocationSize, false);
|
||||
}
|
||||
|
||||
uint64_t getOccupiedMemorySizeForBank(uint32_t bankIndex) {
|
||||
UNRECOVERABLE_IF(bankIndex >= banksCount);
|
||||
return memorySizes[bankIndex].load();
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t banksCount = 0;
|
||||
std::unique_ptr<std::atomic<uint64_t>[]> memorySizes = nullptr;
|
||||
void updateUsageInfo(uint32_t memoryBanks, uint64_t allocationSize, bool reserve);
|
||||
void freeOnBank(uint32_t bankIndex, uint64_t allocationSize);
|
||||
void reserveOnBank(uint32_t bankIndex, uint64_t allocationSize);
|
||||
};
|
||||
} // namespace NEO
|
||||
60
shared/source/memory_manager/memory_constants.h
Normal file
60
shared/source/memory_manager/memory_constants.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
constexpr bool is32bit = (sizeof(void *) == 4);
|
||||
constexpr bool is64bit = (sizeof(void *) == 8);
|
||||
|
||||
constexpr uint64_t maxNBitValue(uint64_t n) {
|
||||
return ((1ULL << n) - 1);
|
||||
}
|
||||
static_assert(maxNBitValue(8) == std::numeric_limits<uint8_t>::max(), "");
|
||||
static_assert(maxNBitValue(16) == std::numeric_limits<uint16_t>::max(), "");
|
||||
static_assert(maxNBitValue(32) == std::numeric_limits<uint32_t>::max(), "");
|
||||
|
||||
namespace MemoryConstants {
|
||||
constexpr uint64_t zoneHigh = ~(uint64_t)0xFFFFFFFF;
|
||||
constexpr uint64_t kiloByte = 1024;
|
||||
constexpr uint64_t kiloByteShiftSize = 10;
|
||||
constexpr uint64_t megaByte = 1024 * kiloByte;
|
||||
constexpr uint64_t gigaByte = 1024 * megaByte;
|
||||
constexpr size_t minBufferAlignment = 4;
|
||||
constexpr size_t cacheLineSize = 64;
|
||||
constexpr size_t pageSize = 4 * kiloByte;
|
||||
constexpr size_t pageSize64k = 64 * kiloByte;
|
||||
constexpr size_t preferredAlignment = pageSize; // alignment preferred for performance reasons, i.e. internal allocations
|
||||
constexpr size_t allocationAlignment = pageSize; // alignment required to gratify incoming pointer, i.e. passed host_ptr
|
||||
constexpr size_t slmWindowAlignment = 128 * kiloByte;
|
||||
constexpr size_t slmWindowSize = 64 * kiloByte;
|
||||
constexpr uintptr_t pageMask = (pageSize - 1);
|
||||
constexpr uintptr_t page64kMask = (pageSize64k - 1);
|
||||
constexpr uint64_t max32BitAppAddress = maxNBitValue(31);
|
||||
constexpr uint64_t max64BitAppAddress = maxNBitValue(47);
|
||||
constexpr uint32_t sizeOf4GBinPageEntities = (MemoryConstants::gigaByte * 4 - MemoryConstants::pageSize) / MemoryConstants::pageSize;
|
||||
constexpr uint64_t max32BitAddress = maxNBitValue(32);
|
||||
constexpr uint64_t max36BitAddress = (maxNBitValue(36));
|
||||
constexpr uint64_t max48BitAddress = maxNBitValue(48);
|
||||
constexpr uintptr_t page4kEntryMask = std::numeric_limits<uintptr_t>::max() & ~MemoryConstants::pageMask;
|
||||
constexpr uintptr_t page64kEntryMask = std::numeric_limits<uintptr_t>::max() & ~MemoryConstants::page64kMask;
|
||||
constexpr int GfxAddressBits = is64bit ? 48 : 32;
|
||||
constexpr uint64_t maxSvmAddress = is64bit ? maxNBitValue(47) : maxNBitValue(32);
|
||||
|
||||
} // namespace MemoryConstants
|
||||
|
||||
namespace BlitterConstants {
|
||||
constexpr uint64_t maxBlitWidth = 0x7FC0; // 0x7FFF aligned to cacheline size
|
||||
constexpr uint64_t maxBlitHeight = 0x7FFF;
|
||||
enum class BlitDirection : uint32_t {
|
||||
BufferToHostPtr,
|
||||
HostPtrToBuffer,
|
||||
BufferToBuffer
|
||||
};
|
||||
} // namespace BlitterConstants
|
||||
577
shared/source/memory_manager/memory_manager.cpp
Normal file
577
shared/source/memory_manager/memory_manager.cpp
Normal file
@@ -0,0 +1,577 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/memory_manager.h"
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "debug_settings/debug_settings_manager.h"
|
||||
#include "execution_environment/root_device_environment.h"
|
||||
#include "gmm_helper/gmm.h"
|
||||
#include "gmm_helper/gmm_helper.h"
|
||||
#include "gmm_helper/page_table_mngr.h"
|
||||
#include "gmm_helper/resource_info.h"
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "helpers/basic_math.h"
|
||||
#include "helpers/hw_helper.h"
|
||||
#include "helpers/hw_info.h"
|
||||
#include "helpers/string.h"
|
||||
#include "helpers/surface_format_info.h"
|
||||
#include "memory_manager/deferrable_allocation_deletion.h"
|
||||
#include "memory_manager/deferred_deleter.h"
|
||||
#include "memory_manager/host_ptr_manager.h"
|
||||
#include "memory_manager/internal_allocation_storage.h"
|
||||
#include "os_interface/os_context.h"
|
||||
#include "os_interface/os_interface.h"
|
||||
#include "utilities/compiler_support.h"
|
||||
#include "utilities/stackvec.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
uint32_t MemoryManager::maxOsContextCount = 0u;
|
||||
|
||||
MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : executionEnvironment(executionEnvironment), hostPtrManager(std::make_unique<HostPtrManager>()),
|
||||
multiContextResourceDestructor(std::make_unique<DeferredDeleter>()) {
|
||||
|
||||
localMemoryUsageBankSelector.reset(new LocalMemoryUsageBankSelector(getBanksCount()));
|
||||
|
||||
bool anyLocalMemorySupported = false;
|
||||
|
||||
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); ++rootDeviceIndex) {
|
||||
auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo();
|
||||
this->localMemorySupported.push_back(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getEnableLocalMemory(*hwInfo));
|
||||
this->enable64kbpages.push_back(OSInterface::osEnabled64kbPages && hwInfo->capabilityTable.ftr64KBpages);
|
||||
if (DebugManager.flags.Enable64kbpages.get() > -1) {
|
||||
this->enable64kbpages[rootDeviceIndex] = DebugManager.flags.Enable64kbpages.get() != 0;
|
||||
}
|
||||
|
||||
gfxPartitions.push_back(std::make_unique<GfxPartition>());
|
||||
|
||||
anyLocalMemorySupported |= this->localMemorySupported[rootDeviceIndex];
|
||||
}
|
||||
|
||||
if (anyLocalMemorySupported) {
|
||||
pageFaultManager = PageFaultManager::create();
|
||||
}
|
||||
}
|
||||
|
||||
MemoryManager::~MemoryManager() {
|
||||
for (auto &engine : registeredEngines) {
|
||||
engine.osContext->decRefInternal();
|
||||
}
|
||||
if (reservedMemory) {
|
||||
MemoryManager::alignedFreeWrapper(reservedMemory);
|
||||
}
|
||||
}
|
||||
|
||||
void *MemoryManager::allocateSystemMemory(size_t size, size_t alignment) {
|
||||
// Establish a minimum alignment of 16bytes.
|
||||
constexpr size_t minAlignment = 16;
|
||||
alignment = std::max(alignment, minAlignment);
|
||||
auto restrictions = getAlignedMallocRestrictions();
|
||||
void *ptr = alignedMallocWrapper(size, alignment);
|
||||
|
||||
if (restrictions == nullptr || restrictions->minAddress == 0) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
if (restrictions->minAddress > reinterpret_cast<uintptr_t>(ptr) && ptr != nullptr) {
|
||||
StackVec<void *, 100> invalidMemVector;
|
||||
invalidMemVector.push_back(ptr);
|
||||
do {
|
||||
ptr = alignedMallocWrapper(size, alignment);
|
||||
if (restrictions->minAddress > reinterpret_cast<uintptr_t>(ptr) && ptr != nullptr) {
|
||||
invalidMemVector.push_back(ptr);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
for (auto &it : invalidMemVector) {
|
||||
alignedFreeWrapper(it);
|
||||
}
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData) {
|
||||
if (deferredDeleter) {
|
||||
deferredDeleter->drain(true);
|
||||
}
|
||||
GraphicsAllocation *graphicsAllocation = nullptr;
|
||||
auto osStorage = hostPtrManager->prepareOsStorageForAllocation(*this, allocationData.size, allocationData.hostPtr, allocationData.rootDeviceIndex);
|
||||
if (osStorage.fragmentCount > 0) {
|
||||
graphicsAllocation = createGraphicsAllocation(osStorage, allocationData);
|
||||
}
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData) {
|
||||
bool copyRequired = isCopyRequired(*allocationData.imgInfo, allocationData.hostPtr);
|
||||
|
||||
if (allocationData.hostPtr && !copyRequired) {
|
||||
return allocateGraphicsMemoryWithHostPtr(allocationData);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MemoryManager::cleanGraphicsMemoryCreatedFromHostPtr(GraphicsAllocation *graphicsAllocation) {
|
||||
hostPtrManager->releaseHandleStorage(graphicsAllocation->fragmentsStorage);
|
||||
cleanOsHandles(graphicsAllocation->fragmentsStorage, graphicsAllocation->getRootDeviceIndex());
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::createGraphicsAllocationWithPadding(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
|
||||
return createPaddedAllocation(inputGraphicsAllocation, sizeWithPadding);
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) {
|
||||
return allocateGraphicsMemoryWithProperties({inputGraphicsAllocation->getRootDeviceIndex(), sizeWithPadding, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY});
|
||||
}
|
||||
|
||||
void MemoryManager::freeSystemMemory(void *ptr) {
|
||||
::alignedFree(ptr);
|
||||
}
|
||||
|
||||
void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
|
||||
if (!gfxAllocation) {
|
||||
return;
|
||||
}
|
||||
|
||||
const bool hasFragments = gfxAllocation->fragmentsStorage.fragmentCount != 0;
|
||||
const bool isLocked = gfxAllocation->isLocked();
|
||||
DEBUG_BREAK_IF(hasFragments && isLocked);
|
||||
|
||||
if (!hasFragments) {
|
||||
handleFenceCompletion(gfxAllocation);
|
||||
}
|
||||
if (isLocked) {
|
||||
freeAssociatedResourceImpl(*gfxAllocation);
|
||||
}
|
||||
|
||||
localMemoryUsageBankSelector->freeOnBanks(gfxAllocation->storageInfo.getMemoryBanks(), gfxAllocation->getUnderlyingBufferSize());
|
||||
freeGraphicsMemoryImpl(gfxAllocation);
|
||||
}
|
||||
//if not in use destroy in place
|
||||
//if in use pass to temporary allocation list that is cleaned on blocking calls
|
||||
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
|
||||
if (gfxAllocation->isUsed()) {
|
||||
if (gfxAllocation->isUsedByManyOsContexts()) {
|
||||
multiContextResourceDestructor->deferDeletion(new DeferrableAllocationDeletion{*this, *gfxAllocation});
|
||||
multiContextResourceDestructor->drain(false);
|
||||
return;
|
||||
}
|
||||
for (auto &engine : getRegisteredEngines()) {
|
||||
auto osContextId = engine.osContext->getContextId();
|
||||
auto allocationTaskCount = gfxAllocation->getTaskCount(osContextId);
|
||||
if (gfxAllocation->isUsedByOsContext(osContextId) &&
|
||||
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
|
||||
engine.commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation),
|
||||
TEMPORARY_ALLOCATION);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
freeGraphicsMemory(gfxAllocation);
|
||||
}
|
||||
|
||||
void MemoryManager::waitForDeletions() {
|
||||
if (deferredDeleter) {
|
||||
deferredDeleter->drain(false);
|
||||
}
|
||||
deferredDeleter.reset(nullptr);
|
||||
}
|
||||
bool MemoryManager::isAsyncDeleterEnabled() const {
|
||||
return asyncDeleterEnabled;
|
||||
}
|
||||
|
||||
bool MemoryManager::isLocalMemorySupported(uint32_t rootDeviceIndex) const {
|
||||
return localMemorySupported[rootDeviceIndex];
|
||||
}
|
||||
|
||||
bool MemoryManager::peek64kbPagesEnabled(uint32_t rootDeviceIndex) const {
|
||||
return enable64kbpages[rootDeviceIndex];
|
||||
}
|
||||
|
||||
bool MemoryManager::isMemoryBudgetExhausted() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
OsContext *MemoryManager::createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
|
||||
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority) {
|
||||
auto contextId = ++latestContextId;
|
||||
auto osContext = OsContext::create(peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->osInterface.get(), contextId, deviceBitfield, engineType, preemptionMode, lowPriority);
|
||||
UNRECOVERABLE_IF(!osContext->isInitialized());
|
||||
osContext->incRefInternal();
|
||||
|
||||
registeredEngines.emplace_back(commandStreamReceiver, osContext);
|
||||
|
||||
return osContext;
|
||||
}
|
||||
|
||||
bool MemoryManager::getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo) {
|
||||
UNRECOVERABLE_IF(hostPtr == nullptr && !properties.flags.allocateMemory);
|
||||
UNRECOVERABLE_IF(properties.allocationType == GraphicsAllocation::AllocationType::UNKNOWN);
|
||||
|
||||
bool allow64KbPages = false;
|
||||
bool allow32Bit = false;
|
||||
bool forcePin = properties.flags.forcePin;
|
||||
bool mayRequireL3Flush = false;
|
||||
|
||||
switch (properties.allocationType) {
|
||||
case GraphicsAllocation::AllocationType::BUFFER:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
|
||||
case GraphicsAllocation::AllocationType::CONSTANT_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::GLOBAL_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::PIPE:
|
||||
case GraphicsAllocation::AllocationType::PRINTF_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::PRIVATE_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::SCRATCH_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
|
||||
allow64KbPages = true;
|
||||
allow32Bit = true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (properties.allocationType) {
|
||||
case GraphicsAllocation::AllocationType::SVM_GPU:
|
||||
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
|
||||
allow64KbPages = true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (properties.allocationType) {
|
||||
case GraphicsAllocation::AllocationType::BUFFER:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
|
||||
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
|
||||
forcePin = true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (properties.allocationType) {
|
||||
case GraphicsAllocation::AllocationType::BUFFER:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED:
|
||||
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
|
||||
case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR:
|
||||
case GraphicsAllocation::AllocationType::GLOBAL_SURFACE:
|
||||
case GraphicsAllocation::AllocationType::IMAGE:
|
||||
case GraphicsAllocation::AllocationType::MAP_ALLOCATION:
|
||||
case GraphicsAllocation::AllocationType::PIPE:
|
||||
case GraphicsAllocation::AllocationType::SHARED_BUFFER:
|
||||
case GraphicsAllocation::AllocationType::SHARED_IMAGE:
|
||||
case GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY:
|
||||
case GraphicsAllocation::AllocationType::SVM_CPU:
|
||||
case GraphicsAllocation::AllocationType::SVM_GPU:
|
||||
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
|
||||
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
|
||||
mayRequireL3Flush = true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (properties.allocationType) {
|
||||
case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY:
|
||||
case GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER:
|
||||
case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR:
|
||||
case GraphicsAllocation::AllocationType::FILL_PATTERN:
|
||||
case GraphicsAllocation::AllocationType::MAP_ALLOCATION:
|
||||
case GraphicsAllocation::AllocationType::MCS:
|
||||
case GraphicsAllocation::AllocationType::PREEMPTION:
|
||||
case GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER:
|
||||
case GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE:
|
||||
case GraphicsAllocation::AllocationType::SVM_CPU:
|
||||
case GraphicsAllocation::AllocationType::SVM_ZERO_COPY:
|
||||
case GraphicsAllocation::AllocationType::TAG_BUFFER:
|
||||
case GraphicsAllocation::AllocationType::GLOBAL_FENCE:
|
||||
case GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY:
|
||||
allocationData.flags.useSystemMemory = true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
allocationData.flags.shareable = properties.flags.shareable;
|
||||
allocationData.flags.requiresCpuAccess = GraphicsAllocation::isCpuAccessRequired(properties.allocationType);
|
||||
allocationData.flags.allocateMemory = properties.flags.allocateMemory;
|
||||
allocationData.flags.allow32Bit = allow32Bit;
|
||||
allocationData.flags.allow64kbPages = allow64KbPages;
|
||||
allocationData.flags.forcePin = forcePin;
|
||||
allocationData.flags.uncacheable = properties.flags.uncacheable;
|
||||
allocationData.flags.flushL3 =
|
||||
(mayRequireL3Flush ? properties.flags.flushL3RequiredForRead | properties.flags.flushL3RequiredForWrite : 0u);
|
||||
allocationData.flags.preferRenderCompressed = GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == properties.allocationType;
|
||||
allocationData.flags.multiOsContextCapable = properties.flags.multiOsContextCapable;
|
||||
|
||||
allocationData.hostPtr = hostPtr;
|
||||
allocationData.size = properties.size;
|
||||
allocationData.type = properties.allocationType;
|
||||
allocationData.storageInfo = storageInfo;
|
||||
allocationData.alignment = properties.alignment ? properties.alignment : MemoryConstants::preferredAlignment;
|
||||
allocationData.imgInfo = properties.imgInfo;
|
||||
|
||||
if (allocationData.flags.allocateMemory) {
|
||||
allocationData.hostPtr = nullptr;
|
||||
}
|
||||
|
||||
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
|
||||
allocationData.flags.resource48Bit = properties.flags.resource48Bit;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr) {
|
||||
AllocationData allocationData;
|
||||
getAllocationData(allocationData, properties, hostPtr, createStorageInfoFromProperties(properties));
|
||||
overrideAllocationData(allocationData, properties);
|
||||
|
||||
AllocationStatus status = AllocationStatus::Error;
|
||||
GraphicsAllocation *allocation = allocateGraphicsMemoryInDevicePool(allocationData, status);
|
||||
if (allocation) {
|
||||
localMemoryUsageBankSelector->reserveOnBanks(allocationData.storageInfo.getMemoryBanks(), allocation->getUnderlyingBufferSize());
|
||||
}
|
||||
if (!allocation && status == AllocationStatus::RetryInNonDevicePool) {
|
||||
allocation = allocateGraphicsMemory(allocationData);
|
||||
}
|
||||
FileLoggerInstance().logAllocation(allocation);
|
||||
return allocation;
|
||||
}
|
||||
|
||||
bool MemoryManager::mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) {
|
||||
auto index = graphicsAllocation->getRootDeviceIndex();
|
||||
if (executionEnvironment.rootDeviceEnvironments[index]->pageTableManager.get()) {
|
||||
return executionEnvironment.rootDeviceEnvironments[index]->pageTableManager->updateAuxTable(graphicsAllocation->getGpuAddress(), graphicsAllocation->getDefaultGmm(), true);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData &allocationData) {
|
||||
if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE || allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) {
|
||||
UNRECOVERABLE_IF(allocationData.imgInfo == nullptr);
|
||||
return allocateGraphicsMemoryForImage(allocationData);
|
||||
}
|
||||
if (allocationData.flags.shareable) {
|
||||
return allocateShareableMemory(allocationData);
|
||||
}
|
||||
if (useNonSvmHostPtrAlloc(allocationData.type, allocationData.rootDeviceIndex)) {
|
||||
auto allocation = allocateGraphicsMemoryForNonSvmHostPtr(allocationData);
|
||||
if (allocation) {
|
||||
allocation->setFlushL3Required(allocationData.flags.flushL3);
|
||||
}
|
||||
return allocation;
|
||||
}
|
||||
if (useInternal32BitAllocator(allocationData.type) ||
|
||||
(force32bitAllocations && allocationData.flags.allow32Bit && is64bit)) {
|
||||
return allocate32BitGraphicsMemoryImpl(allocationData);
|
||||
}
|
||||
if (allocationData.hostPtr) {
|
||||
return allocateGraphicsMemoryWithHostPtr(allocationData);
|
||||
}
|
||||
if (peek64kbPagesEnabled(allocationData.rootDeviceIndex) && allocationData.flags.allow64kbPages) {
|
||||
return allocateGraphicsMemory64kb(allocationData);
|
||||
}
|
||||
return allocateGraphicsMemoryWithAlignment(allocationData);
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImage(const AllocationData &allocationData) {
|
||||
auto gmm = std::make_unique<Gmm>(executionEnvironment.getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo);
|
||||
|
||||
// AllocationData needs to be reconfigured for System Memory paths
|
||||
AllocationData allocationDataWithSize = allocationData;
|
||||
allocationDataWithSize.size = allocationData.imgInfo->size;
|
||||
|
||||
auto hostPtrAllocation = allocateGraphicsMemoryForImageFromHostPtr(allocationDataWithSize);
|
||||
|
||||
if (hostPtrAllocation) {
|
||||
hostPtrAllocation->setDefaultGmm(gmm.release());
|
||||
return hostPtrAllocation;
|
||||
}
|
||||
|
||||
return allocateGraphicsMemoryForImageImpl(allocationDataWithSize, std::move(gmm));
|
||||
}
|
||||
|
||||
EngineControlContainer &MemoryManager::getRegisteredEngines() {
|
||||
return registeredEngines;
|
||||
}
|
||||
|
||||
EngineControl *MemoryManager::getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver) {
|
||||
EngineControl *engineCtrl = nullptr;
|
||||
for (auto &engine : registeredEngines) {
|
||||
if (engine.commandStreamReceiver == commandStreamReceiver) {
|
||||
engineCtrl = &engine;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return engineCtrl;
|
||||
}
|
||||
|
||||
void MemoryManager::unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver) {
|
||||
auto numRegisteredEngines = registeredEngines.size();
|
||||
for (auto i = 0u; i < numRegisteredEngines; i++) {
|
||||
if (registeredEngines[i].commandStreamReceiver == commandStreamReceiver) {
|
||||
registeredEngines[i].osContext->decRefInternal();
|
||||
std::swap(registeredEngines[i], registeredEngines[numRegisteredEngines - 1]);
|
||||
registeredEngines.pop_back();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void *MemoryManager::lockResource(GraphicsAllocation *graphicsAllocation) {
|
||||
if (!graphicsAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
if (graphicsAllocation->isLocked()) {
|
||||
return graphicsAllocation->getLockedPtr();
|
||||
}
|
||||
auto retVal = lockResourceImpl(*graphicsAllocation);
|
||||
graphicsAllocation->lock(retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void MemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) {
|
||||
if (!graphicsAllocation) {
|
||||
return;
|
||||
}
|
||||
DEBUG_BREAK_IF(!graphicsAllocation->isLocked());
|
||||
unlockResourceImpl(*graphicsAllocation);
|
||||
graphicsAllocation->unlock();
|
||||
}
|
||||
|
||||
HeapIndex MemoryManager::selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM) {
|
||||
if (allocation) {
|
||||
if (useInternal32BitAllocator(allocation->getAllocationType())) {
|
||||
return HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY;
|
||||
}
|
||||
if (allocation->is32BitAllocation()) {
|
||||
return HeapIndex::HEAP_EXTERNAL;
|
||||
}
|
||||
}
|
||||
if (isFullRangeSVM) {
|
||||
if (hasPointer) {
|
||||
return HeapIndex::HEAP_SVM;
|
||||
}
|
||||
if (allocation && allocation->getDefaultGmm()->gmmResourceInfo->is64KBPageSuitable()) {
|
||||
return HeapIndex::HEAP_STANDARD64KB;
|
||||
}
|
||||
return HeapIndex::HEAP_STANDARD;
|
||||
}
|
||||
// Limited range allocation goes to STANDARD heap
|
||||
return HeapIndex::HEAP_STANDARD;
|
||||
}
|
||||
|
||||
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
if (!graphicsAllocation->getUnderlyingBuffer()) {
|
||||
return false;
|
||||
}
|
||||
memcpy_s(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation) {
|
||||
for (auto &engine : getRegisteredEngines()) {
|
||||
auto osContextId = engine.osContext->getContextId();
|
||||
auto allocationTaskCount = graphicsAllocation.getTaskCount(osContextId);
|
||||
if (graphicsAllocation.isUsedByOsContext(osContextId) &&
|
||||
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
|
||||
engine.commandStreamReceiver->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, allocationTaskCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion) {
|
||||
for (auto &engine : getRegisteredEngines()) {
|
||||
auto csr = engine.commandStreamReceiver;
|
||||
if (waitForCompletion) {
|
||||
csr->waitForCompletionWithTimeout(false, 0, csr->peekLatestSentTaskCount());
|
||||
}
|
||||
csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
}
|
||||
|
||||
void *MemoryManager::getReservedMemory(size_t size, size_t alignment) {
|
||||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!reservedMemory) {
|
||||
reservedMemory = allocateSystemMemory(size, alignment);
|
||||
}
|
||||
return reservedMemory;
|
||||
}
|
||||
|
||||
bool MemoryManager::isHostPointerTrackingEnabled(uint32_t rootDeviceIndex) {
|
||||
if (DebugManager.flags.EnableHostPtrTracking.get() != -1) {
|
||||
return !!DebugManager.flags.EnableHostPtrTracking.get();
|
||||
}
|
||||
return (peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.hostPtrTrackingEnabled | is32bit);
|
||||
}
|
||||
|
||||
bool MemoryManager::isCopyRequired(ImageInfo &imgInfo, const void *hostPtr) {
|
||||
if (!hostPtr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t imageWidth = imgInfo.imgDesc.imageWidth;
|
||||
size_t imageHeight = 1;
|
||||
size_t imageDepth = 1;
|
||||
size_t imageCount = 1;
|
||||
|
||||
switch (imgInfo.imgDesc.imageType) {
|
||||
case ImageType::Image3D:
|
||||
imageDepth = imgInfo.imgDesc.imageDepth;
|
||||
CPP_ATTRIBUTE_FALLTHROUGH;
|
||||
case ImageType::Image2D:
|
||||
case ImageType::Image2DArray:
|
||||
imageHeight = imgInfo.imgDesc.imageHeight;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
auto hostPtrRowPitch = imgInfo.imgDesc.imageRowPitch ? imgInfo.imgDesc.imageRowPitch : imageWidth * imgInfo.surfaceFormat->ImageElementSizeInBytes;
|
||||
auto hostPtrSlicePitch = imgInfo.imgDesc.imageSlicePitch ? imgInfo.imgDesc.imageSlicePitch : hostPtrRowPitch * imgInfo.imgDesc.imageHeight;
|
||||
|
||||
size_t pointerPassedSize = hostPtrRowPitch * imageHeight * imageDepth * imageCount;
|
||||
auto alignedSizePassedPointer = alignSizeWholePage(const_cast<void *>(hostPtr), pointerPassedSize);
|
||||
auto alignedSizeRequiredForAllocation = alignSizeWholePage(const_cast<void *>(hostPtr), imgInfo.size);
|
||||
|
||||
// Passed pointer doesn't have enough memory, copy is needed
|
||||
bool copyRequired = (alignedSizeRequiredForAllocation > alignedSizePassedPointer) |
|
||||
(imgInfo.rowPitch != hostPtrRowPitch) |
|
||||
(imgInfo.slicePitch != hostPtrSlicePitch) |
|
||||
((reinterpret_cast<uintptr_t>(hostPtr) & (MemoryConstants::cacheLineSize - 1)) != 0) |
|
||||
!imgInfo.linearStorage;
|
||||
|
||||
return copyRequired;
|
||||
}
|
||||
|
||||
void MemoryManager::overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties) {
|
||||
int32_t directRingPlacement = DebugManager.flags.DirectSubmissionBufferPlacement.get();
|
||||
if (properties.allocationType == GraphicsAllocation::AllocationType::RING_BUFFER &&
|
||||
directRingPlacement != -1) {
|
||||
if (directRingPlacement == 0) {
|
||||
allocationData.flags.requiresCpuAccess = true;
|
||||
allocationData.flags.useSystemMemory = false;
|
||||
} else {
|
||||
allocationData.flags.requiresCpuAccess = false;
|
||||
allocationData.flags.useSystemMemory = true;
|
||||
}
|
||||
}
|
||||
int32_t directSemaphorePlacement = DebugManager.flags.DirectSubmissionSemaphorePlacement.get();
|
||||
if (properties.allocationType == GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER &&
|
||||
directSemaphorePlacement != -1) {
|
||||
if (directSemaphorePlacement == 0) {
|
||||
allocationData.flags.requiresCpuAccess = true;
|
||||
allocationData.flags.useSystemMemory = false;
|
||||
} else {
|
||||
allocationData.flags.requiresCpuAccess = false;
|
||||
allocationData.flags.useSystemMemory = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
258
shared/source/memory_manager/memory_manager.h
Normal file
258
shared/source/memory_manager/memory_manager.h
Normal file
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/preemption_mode.h"
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "helpers/bit_helpers.h"
|
||||
#include "helpers/common_types.h"
|
||||
#include "helpers/engine_control.h"
|
||||
#include "memory_manager/allocation_properties.h"
|
||||
#include "memory_manager/gfx_partition.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
#include "memory_manager/host_ptr_defines.h"
|
||||
#include "memory_manager/local_memory_usage.h"
|
||||
#include "page_fault_manager/cpu_page_fault_manager.h"
|
||||
|
||||
#include "engine_node.h"
|
||||
|
||||
#include <bitset>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class DeferredDeleter;
|
||||
class ExecutionEnvironment;
|
||||
class Gmm;
|
||||
class HostPtrManager;
|
||||
class OsContext;
|
||||
|
||||
enum AllocationUsage {
|
||||
TEMPORARY_ALLOCATION,
|
||||
REUSABLE_ALLOCATION
|
||||
};
|
||||
|
||||
struct AlignedMallocRestrictions {
|
||||
uintptr_t minAddress;
|
||||
};
|
||||
|
||||
constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte;
|
||||
|
||||
class MemoryManager {
|
||||
public:
|
||||
enum AllocationStatus {
|
||||
Success = 0,
|
||||
Error,
|
||||
InvalidHostPointer,
|
||||
RetryInNonDevicePool
|
||||
};
|
||||
|
||||
MemoryManager(ExecutionEnvironment &executionEnvironment);
|
||||
|
||||
virtual ~MemoryManager();
|
||||
MOCKABLE_VIRTUAL void *allocateSystemMemory(size_t size, size_t alignment);
|
||||
|
||||
virtual void addAllocationToHostPtrManager(GraphicsAllocation *memory) = 0;
|
||||
virtual void removeAllocationFromHostPtrManager(GraphicsAllocation *memory) = 0;
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) {
|
||||
return allocateGraphicsMemoryInPreferredPool(properties, nullptr);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) {
|
||||
return allocateGraphicsMemoryInPreferredPool(properties, ptr);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr);
|
||||
|
||||
virtual GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) = 0;
|
||||
|
||||
virtual GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) = 0;
|
||||
|
||||
virtual bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation);
|
||||
|
||||
void *lockResource(GraphicsAllocation *graphicsAllocation);
|
||||
void unlockResource(GraphicsAllocation *graphicsAllocation);
|
||||
|
||||
void cleanGraphicsMemoryCreatedFromHostPtr(GraphicsAllocation *);
|
||||
GraphicsAllocation *createGraphicsAllocationWithPadding(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding);
|
||||
virtual GraphicsAllocation *createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding);
|
||||
|
||||
virtual AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) = 0;
|
||||
virtual void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) = 0;
|
||||
|
||||
void freeSystemMemory(void *ptr);
|
||||
|
||||
virtual void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) = 0;
|
||||
MOCKABLE_VIRTUAL void freeGraphicsMemory(GraphicsAllocation *gfxAllocation);
|
||||
virtual void handleFenceCompletion(GraphicsAllocation *allocation){};
|
||||
|
||||
void checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation);
|
||||
|
||||
virtual uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) = 0;
|
||||
virtual uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) = 0;
|
||||
|
||||
uint64_t getMaxApplicationAddress() { return is64bit ? MemoryConstants::max64BitAppAddress : MemoryConstants::max32BitAppAddress; };
|
||||
uint64_t getInternalHeapBaseAddress(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); }
|
||||
uint64_t getExternalHeapBaseAddress(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTERNAL); }
|
||||
|
||||
bool isLimitedRange(uint32_t rootDeviceIndex) { return getGfxPartition(rootDeviceIndex)->isLimitedRange(); }
|
||||
|
||||
bool peek64kbPagesEnabled(uint32_t rootDeviceIndex) const;
|
||||
bool peekForce32BitAllocations() const { return force32bitAllocations; }
|
||||
void setForce32BitAllocations(bool newValue) { force32bitAllocations = newValue; }
|
||||
|
||||
bool peekVirtualPaddingSupport() const { return virtualPaddingAvailable; }
|
||||
void setVirtualPaddingSupport(bool virtualPaddingSupport) { virtualPaddingAvailable = virtualPaddingSupport; }
|
||||
|
||||
DeferredDeleter *getDeferredDeleter() const {
|
||||
return deferredDeleter.get();
|
||||
}
|
||||
|
||||
PageFaultManager *getPageFaultManager() const {
|
||||
return pageFaultManager.get();
|
||||
}
|
||||
|
||||
void waitForDeletions();
|
||||
void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation);
|
||||
void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion);
|
||||
|
||||
bool isAsyncDeleterEnabled() const;
|
||||
bool isLocalMemorySupported(uint32_t rootDeviceIndex) const;
|
||||
virtual bool isMemoryBudgetExhausted() const;
|
||||
|
||||
virtual AlignedMallocRestrictions *getAlignedMallocRestrictions() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL void *alignedMallocWrapper(size_t bytes, size_t alignment) {
|
||||
return ::alignedMalloc(bytes, alignment);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL void alignedFreeWrapper(void *ptr) {
|
||||
::alignedFree(ptr);
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL bool isHostPointerTrackingEnabled(uint32_t rootDeviceIndex);
|
||||
|
||||
void setForceNonSvmForExternalHostPtr(bool mode) {
|
||||
forceNonSvmForExternalHostPtr = mode;
|
||||
}
|
||||
|
||||
const ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; }
|
||||
|
||||
OsContext *createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, aub_stream::EngineType engineType,
|
||||
DeviceBitfield deviceBitfield, PreemptionMode preemptionMode, bool lowPriority);
|
||||
uint32_t getRegisteredEnginesCount() const { return static_cast<uint32_t>(registeredEngines.size()); }
|
||||
EngineControlContainer &getRegisteredEngines();
|
||||
EngineControl *getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
|
||||
void unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
|
||||
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
|
||||
void setDefaultEngineIndex(uint32_t index) { defaultEngineIndex = index; }
|
||||
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy);
|
||||
static HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM);
|
||||
static std::unique_ptr<MemoryManager> createMemoryManager(ExecutionEnvironment &executionEnvironment);
|
||||
virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; };
|
||||
virtual void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex){};
|
||||
void *getReservedMemory(size_t size, size_t alignment);
|
||||
GfxPartition *getGfxPartition(uint32_t rootDeviceIndex) { return gfxPartitions.at(rootDeviceIndex).get(); }
|
||||
|
||||
static uint32_t maxOsContextCount;
|
||||
virtual void commonCleanup(){};
|
||||
|
||||
protected:
|
||||
struct AllocationData {
|
||||
union {
|
||||
struct {
|
||||
uint32_t allocateMemory : 1;
|
||||
uint32_t allow64kbPages : 1;
|
||||
uint32_t allow32Bit : 1;
|
||||
uint32_t useSystemMemory : 1;
|
||||
uint32_t forcePin : 1;
|
||||
uint32_t uncacheable : 1;
|
||||
uint32_t flushL3 : 1;
|
||||
uint32_t preferRenderCompressed : 1;
|
||||
uint32_t multiOsContextCapable : 1;
|
||||
uint32_t requiresCpuAccess : 1;
|
||||
uint32_t shareable : 1;
|
||||
uint32_t resource48Bit : 1;
|
||||
uint32_t reserved : 20;
|
||||
} flags;
|
||||
uint32_t allFlags = 0;
|
||||
};
|
||||
static_assert(sizeof(AllocationData::flags) == sizeof(AllocationData::allFlags), "");
|
||||
GraphicsAllocation::AllocationType type = GraphicsAllocation::AllocationType::UNKNOWN;
|
||||
const void *hostPtr = nullptr;
|
||||
size_t size = 0;
|
||||
size_t alignment = 0;
|
||||
StorageInfo storageInfo = {};
|
||||
ImageInfo *imgInfo = nullptr;
|
||||
uint32_t rootDeviceIndex = 0;
|
||||
};
|
||||
|
||||
static bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
|
||||
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
|
||||
static bool useInternal32BitAllocator(GraphicsAllocation::AllocationType allocationType) {
|
||||
return allocationType == GraphicsAllocation::AllocationType::KERNEL_ISA ||
|
||||
allocationType == GraphicsAllocation::AllocationType::INTERNAL_HEAP;
|
||||
}
|
||||
static bool isCopyRequired(ImageInfo &imgInfo, const void *hostPtr);
|
||||
|
||||
bool useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType allocationType, uint32_t rootDeviceIndex) {
|
||||
bool isExternalHostPtrAlloc = (allocationType == GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR);
|
||||
bool isMapAlloc = (allocationType == GraphicsAllocation::AllocationType::MAP_ALLOCATION);
|
||||
|
||||
if (forceNonSvmForExternalHostPtr && isExternalHostPtrAlloc) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isNonSvmPtrCapable = ((!peekExecutionEnvironment().isFullRangeSvm() || !isHostPointerTrackingEnabled(rootDeviceIndex)) & !is32bit);
|
||||
|
||||
return isNonSvmPtrCapable && (isExternalHostPtrAlloc || isMapAlloc);
|
||||
}
|
||||
StorageInfo createStorageInfoFromProperties(const AllocationProperties &properties);
|
||||
|
||||
virtual GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) = 0;
|
||||
virtual GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) = 0;
|
||||
GraphicsAllocation *allocateGraphicsMemory(const AllocationData &allocationData);
|
||||
virtual GraphicsAllocation *allocateGraphicsMemoryWithHostPtr(const AllocationData &allocationData);
|
||||
virtual GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) = 0;
|
||||
virtual GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) = 0;
|
||||
virtual GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) = 0;
|
||||
virtual GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) = 0;
|
||||
GraphicsAllocation *allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData);
|
||||
MOCKABLE_VIRTUAL GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData);
|
||||
virtual GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) = 0;
|
||||
virtual GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) = 0;
|
||||
virtual void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) = 0;
|
||||
virtual void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) = 0;
|
||||
virtual void freeAssociatedResourceImpl(GraphicsAllocation &graphicsAllocation) { return unlockResourceImpl(graphicsAllocation); };
|
||||
uint32_t getBanksCount();
|
||||
|
||||
bool forceNonSvmForExternalHostPtr = false;
|
||||
bool force32bitAllocations = false;
|
||||
bool virtualPaddingAvailable = false;
|
||||
std::unique_ptr<DeferredDeleter> deferredDeleter;
|
||||
bool asyncDeleterEnabled = false;
|
||||
std::vector<bool> enable64kbpages;
|
||||
std::vector<bool> localMemorySupported;
|
||||
bool supportsMultiStorageResources = true;
|
||||
ExecutionEnvironment &executionEnvironment;
|
||||
EngineControlContainer registeredEngines;
|
||||
std::unique_ptr<HostPtrManager> hostPtrManager;
|
||||
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t defaultEngineIndex = 0;
|
||||
std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;
|
||||
std::vector<std::unique_ptr<GfxPartition>> gfxPartitions;
|
||||
std::unique_ptr<LocalMemoryUsageBankSelector> localMemoryUsageBankSelector;
|
||||
void *reservedMemory = nullptr;
|
||||
std::unique_ptr<PageFaultManager> pageFaultManager;
|
||||
};
|
||||
|
||||
std::unique_ptr<DeferredDeleter> createDeferredDeleter();
|
||||
} // namespace NEO
|
||||
25
shared/source/memory_manager/memory_operations_handler.h
Normal file
25
shared/source/memory_manager/memory_operations_handler.h
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/memory_operations_status.h"
|
||||
#include "utilities/arrayref.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class GraphicsAllocation;
|
||||
|
||||
class MemoryOperationsHandler {
|
||||
public:
|
||||
MemoryOperationsHandler() = default;
|
||||
virtual ~MemoryOperationsHandler() = default;
|
||||
|
||||
virtual MemoryOperationsStatus makeResident(ArrayRef<GraphicsAllocation *> gfxAllocations) = 0;
|
||||
virtual MemoryOperationsStatus evict(GraphicsAllocation &gfxAllocation) = 0;
|
||||
virtual MemoryOperationsStatus isResident(GraphicsAllocation &gfxAllocation) = 0;
|
||||
};
|
||||
} // namespace NEO
|
||||
22
shared/source/memory_manager/memory_operations_status.h
Normal file
22
shared/source/memory_manager/memory_operations_status.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
enum class MemoryOperationsStatus : uint32_t {
|
||||
SUCCESS = 0,
|
||||
FAILED,
|
||||
MEMORY_NOT_FOUND,
|
||||
OUT_OF_MEMORY,
|
||||
UNSUPPORTED,
|
||||
DEVICE_UNINITIALIZED,
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
30
shared/source/memory_manager/memory_pool.h
Normal file
30
shared/source/memory_manager/memory_pool.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers/extendable_enum.h"
|
||||
|
||||
namespace MemoryPool {
|
||||
struct Type : ExtendableEnum {
|
||||
constexpr Type(uint32_t val) : ExtendableEnum(val) {}
|
||||
};
|
||||
constexpr Type MemoryNull{0};
|
||||
constexpr Type System4KBPages{1};
|
||||
constexpr Type System64KBPages{2};
|
||||
constexpr Type System4KBPagesWith32BitGpuAddressing{3};
|
||||
constexpr Type System64KBPagesWith32BitGpuAddressing{4};
|
||||
constexpr Type SystemCpuInaccessible{5};
|
||||
constexpr Type LocalMemory{6};
|
||||
|
||||
inline bool isSystemMemoryPool(Type pool) {
|
||||
return pool == System4KBPages ||
|
||||
pool == System64KBPages ||
|
||||
pool == System4KBPagesWith32BitGpuAddressing ||
|
||||
pool == System64KBPagesWith32BitGpuAddressing;
|
||||
}
|
||||
} // namespace MemoryPool
|
||||
18
shared/source/memory_manager/residency.cpp
Normal file
18
shared/source/memory_manager/residency.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/residency.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
void ResidencyData::updateCompletionData(uint64_t newFenceValue, uint32_t contextId) {
|
||||
lastFenceValues[contextId] = newFenceValue;
|
||||
}
|
||||
|
||||
uint64_t ResidencyData::getFenceValueForContextId(uint32_t contextId) {
|
||||
return lastFenceValues[contextId];
|
||||
}
|
||||
26
shared/source/memory_manager/residency.h
Normal file
26
shared/source/memory_manager/residency.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "memory_manager/memory_manager.h"
|
||||
#include "utilities/stackvec.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct ResidencyData {
|
||||
ResidencyData() : lastFenceValues(static_cast<size_t>(MemoryManager::maxOsContextCount)) {}
|
||||
std::vector<bool> resident = std::vector<bool>(MemoryManager::maxOsContextCount, 0);
|
||||
|
||||
void updateCompletionData(uint64_t newFenceValue, uint32_t contextId);
|
||||
uint64_t getFenceValueForContextId(uint32_t contextId);
|
||||
|
||||
protected:
|
||||
StackVec<uint64_t, 32> lastFenceValues;
|
||||
};
|
||||
} // namespace NEO
|
||||
17
shared/source/memory_manager/residency_container.h
Normal file
17
shared/source/memory_manager/residency_container.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
using ResidencyContainer = std::vector<GraphicsAllocation *>;
|
||||
using AllocationView = std::pair<uint64_t /*address*/, size_t /*size*/>;
|
||||
|
||||
} // namespace NEO
|
||||
110
shared/source/memory_manager/surface.h
Normal file
110
shared/source/memory_manager/surface.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "helpers/cache_policy.h"
|
||||
#include "memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class Surface {
|
||||
public:
|
||||
Surface(bool isCoherent = false) : IsCoherent(isCoherent) {}
|
||||
virtual ~Surface() = default;
|
||||
virtual void makeResident(CommandStreamReceiver &csr) = 0;
|
||||
virtual Surface *duplicate() = 0;
|
||||
virtual bool allowsL3Caching() { return true; }
|
||||
bool IsCoherent;
|
||||
};
|
||||
|
||||
class NullSurface : public Surface {
|
||||
public:
|
||||
NullSurface(){};
|
||||
~NullSurface() override = default;
|
||||
|
||||
void makeResident(CommandStreamReceiver &csr) override{};
|
||||
Surface *duplicate() override { return new NullSurface(); };
|
||||
};
|
||||
|
||||
class HostPtrSurface : public Surface {
|
||||
public:
|
||||
HostPtrSurface(void *ptr, size_t size) : memoryPointer(ptr), surfaceSize(size) {
|
||||
UNRECOVERABLE_IF(!ptr);
|
||||
gfxAllocation = nullptr;
|
||||
}
|
||||
|
||||
HostPtrSurface(void *ptr, size_t size, bool copyAllowed) : HostPtrSurface(ptr, size) {
|
||||
isPtrCopyAllowed = copyAllowed;
|
||||
}
|
||||
|
||||
HostPtrSurface(void *ptr, size_t size, GraphicsAllocation *allocation) : memoryPointer(ptr), surfaceSize(size), gfxAllocation(allocation) {
|
||||
DEBUG_BREAK_IF(!ptr);
|
||||
}
|
||||
~HostPtrSurface() override = default;
|
||||
|
||||
void makeResident(CommandStreamReceiver &csr) override {
|
||||
DEBUG_BREAK_IF(!gfxAllocation);
|
||||
csr.makeResidentHostPtrAllocation(gfxAllocation);
|
||||
}
|
||||
Surface *duplicate() override {
|
||||
return new HostPtrSurface(this->memoryPointer, this->surfaceSize, this->gfxAllocation);
|
||||
};
|
||||
|
||||
void *getMemoryPointer() const {
|
||||
return memoryPointer;
|
||||
}
|
||||
size_t getSurfaceSize() const {
|
||||
return surfaceSize;
|
||||
}
|
||||
|
||||
void setAllocation(GraphicsAllocation *allocation) {
|
||||
this->gfxAllocation = allocation;
|
||||
}
|
||||
|
||||
GraphicsAllocation *getAllocation() {
|
||||
return gfxAllocation;
|
||||
}
|
||||
|
||||
bool peekIsPtrCopyAllowed() {
|
||||
return isPtrCopyAllowed;
|
||||
}
|
||||
|
||||
bool allowsL3Caching() override {
|
||||
return isL3Capable(*gfxAllocation);
|
||||
}
|
||||
|
||||
protected:
|
||||
void *memoryPointer;
|
||||
size_t surfaceSize;
|
||||
GraphicsAllocation *gfxAllocation;
|
||||
bool isPtrCopyAllowed = false;
|
||||
};
|
||||
|
||||
class GeneralSurface : public Surface {
|
||||
public:
|
||||
GeneralSurface() : Surface(false) {
|
||||
gfxAllocation = nullptr;
|
||||
}
|
||||
GeneralSurface(GraphicsAllocation *gfxAlloc) : Surface(gfxAlloc->isCoherent()) {
|
||||
gfxAllocation = gfxAlloc;
|
||||
};
|
||||
~GeneralSurface() override = default;
|
||||
|
||||
void makeResident(CommandStreamReceiver &csr) override {
|
||||
csr.makeResident(*gfxAllocation);
|
||||
};
|
||||
Surface *duplicate() override { return new GeneralSurface(gfxAllocation); };
|
||||
void setGraphicsAllocation(GraphicsAllocation *newAllocation) {
|
||||
gfxAllocation = newAllocation;
|
||||
IsCoherent = newAllocation->isCoherent();
|
||||
}
|
||||
|
||||
protected:
|
||||
GraphicsAllocation *gfxAllocation;
|
||||
};
|
||||
} // namespace NEO
|
||||
285
shared/source/memory_manager/unified_memory_manager.cpp
Normal file
285
shared/source/memory_manager/unified_memory_manager.cpp
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "command_stream/command_stream_receiver.h"
|
||||
#include "helpers/aligned_memory.h"
|
||||
#include "memory_manager/memory_manager.h"
|
||||
#include "opencl/source/mem_obj/mem_obj_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void SVMAllocsManager::MapBasedAllocationTracker::insert(SvmAllocationData allocationsPair) {
|
||||
allocations.insert(std::make_pair(reinterpret_cast<void *>(allocationsPair.gpuAllocation->getGpuAddress()), allocationsPair));
|
||||
}
|
||||
|
||||
void SVMAllocsManager::MapBasedAllocationTracker::remove(SvmAllocationData allocationsPair) {
|
||||
SvmAllocationContainer::iterator iter;
|
||||
iter = allocations.find(reinterpret_cast<void *>(allocationsPair.gpuAllocation->getGpuAddress()));
|
||||
allocations.erase(iter);
|
||||
}
|
||||
|
||||
SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) {
|
||||
SvmAllocationContainer::iterator Iter, End;
|
||||
SvmAllocationData *svmAllocData;
|
||||
if (ptr == nullptr)
|
||||
return nullptr;
|
||||
End = allocations.end();
|
||||
Iter = allocations.lower_bound(ptr);
|
||||
if (((Iter != End) && (Iter->first != ptr)) ||
|
||||
(Iter == End)) {
|
||||
if (Iter == allocations.begin()) {
|
||||
Iter = End;
|
||||
} else {
|
||||
Iter--;
|
||||
}
|
||||
}
|
||||
if (Iter != End) {
|
||||
svmAllocData = &Iter->second;
|
||||
char *charPtr = reinterpret_cast<char *>(svmAllocData->gpuAllocation->getGpuAddress());
|
||||
if (ptr < (charPtr + svmAllocData->size)) {
|
||||
return svmAllocData;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void SVMAllocsManager::MapOperationsTracker::insert(SvmMapOperation mapOperation) {
|
||||
operations.insert(std::make_pair(mapOperation.regionSvmPtr, mapOperation));
|
||||
}
|
||||
|
||||
void SVMAllocsManager::MapOperationsTracker::remove(const void *regionPtr) {
|
||||
SvmMapOperationsContainer::iterator iter;
|
||||
iter = operations.find(regionPtr);
|
||||
operations.erase(iter);
|
||||
}
|
||||
|
||||
SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionPtr) {
|
||||
SvmMapOperationsContainer::iterator iter;
|
||||
iter = operations.find(regionPtr);
|
||||
if (iter == operations.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &iter->second;
|
||||
}
|
||||
|
||||
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
for (auto &allocation : this->SVMAllocs.allocations) {
|
||||
if (allocation.second.memoryType & requestedTypesMask) {
|
||||
commandStreamReceiver.makeResident(*allocation.second.gpuAllocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager) : memoryManager(memoryManager) {
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createSVMAlloc(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties svmProperties) {
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
if (!memoryManager->isLocalMemorySupported(rootDeviceIndex)) {
|
||||
return createZeroCopySvmAllocation(rootDeviceIndex, size, svmProperties);
|
||||
} else {
|
||||
return createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, svmProperties, {});
|
||||
}
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &memoryProperties) {
|
||||
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
|
||||
|
||||
GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
|
||||
if (memoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) {
|
||||
if (memoryProperties.allocationFlags.allocFlags.allocWriteCombined) {
|
||||
allocationType = GraphicsAllocation::AllocationType::WRITE_COMBINED;
|
||||
} else {
|
||||
allocationType = GraphicsAllocation::AllocationType::BUFFER;
|
||||
}
|
||||
}
|
||||
|
||||
AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
|
||||
true,
|
||||
alignedSize,
|
||||
allocationType,
|
||||
memoryProperties.subdeviceBitfield.count() > 1,
|
||||
memoryProperties.subdeviceBitfield.count() > 1,
|
||||
memoryProperties.subdeviceBitfield};
|
||||
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
|
||||
|
||||
GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties);
|
||||
if (!unifiedMemoryAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SvmAllocationData allocData;
|
||||
allocData.gpuAllocation = unifiedMemoryAllocation;
|
||||
allocData.cpuAllocation = nullptr;
|
||||
allocData.size = size;
|
||||
allocData.memoryType = memoryProperties.memoryType;
|
||||
allocData.allocationFlagsProperty = memoryProperties.allocationFlags;
|
||||
allocData.device = memoryProperties.device;
|
||||
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
this->SVMAllocs.insert(allocData);
|
||||
return reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &memoryProperties, void *cmdQ) {
|
||||
auto supportDualStorageSharedMemory = memoryManager->isLocalMemorySupported(rootDeviceIndex);
|
||||
|
||||
if (DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
|
||||
supportDualStorageSharedMemory = !!DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
|
||||
}
|
||||
|
||||
if (supportDualStorageSharedMemory) {
|
||||
auto unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, {}, memoryProperties);
|
||||
if (!unifiedMemoryPointer) {
|
||||
return nullptr;
|
||||
}
|
||||
auto unifiedMemoryAllocation = this->getSVMAlloc(unifiedMemoryPointer);
|
||||
unifiedMemoryAllocation->memoryType = memoryProperties.memoryType;
|
||||
unifiedMemoryAllocation->allocationFlagsProperty = memoryProperties.allocationFlags;
|
||||
|
||||
UNRECOVERABLE_IF(cmdQ == nullptr);
|
||||
auto pageFaultManager = this->memoryManager->getPageFaultManager();
|
||||
pageFaultManager->insertAllocation(unifiedMemoryPointer, size, this, cmdQ);
|
||||
|
||||
return unifiedMemoryPointer;
|
||||
}
|
||||
return createUnifiedMemoryAllocation(rootDeviceIndex, size, memoryProperties);
|
||||
}
|
||||
|
||||
SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) {
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
return SVMAllocs.get(ptr);
|
||||
}
|
||||
|
||||
bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
|
||||
SvmAllocationData *svmData = getSVMAlloc(ptr);
|
||||
if (svmData) {
|
||||
if (blocking) {
|
||||
if (svmData->cpuAllocation) {
|
||||
this->memoryManager->waitForEnginesCompletion(*svmData->cpuAllocation);
|
||||
}
|
||||
this->memoryManager->waitForEnginesCompletion(*svmData->gpuAllocation);
|
||||
}
|
||||
|
||||
auto pageFaultManager = this->memoryManager->getPageFaultManager();
|
||||
if (pageFaultManager) {
|
||||
pageFaultManager->removeAllocation(ptr);
|
||||
}
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
if (svmData->gpuAllocation->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY) {
|
||||
freeZeroCopySvmAllocation(svmData);
|
||||
} else {
|
||||
freeSvmAllocationWithDeviceStorage(svmData);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties) {
|
||||
AllocationProperties properties{rootDeviceIndex, true, size, GraphicsAllocation::AllocationType::SVM_ZERO_COPY, false};
|
||||
MemoryPropertiesParser::fillCachePolicyInProperties(properties, false, svmProperties.readOnly, false);
|
||||
GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
|
||||
if (!allocation) {
|
||||
return nullptr;
|
||||
}
|
||||
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
|
||||
allocation->setCoherent(svmProperties.coherent);
|
||||
|
||||
SvmAllocationData allocData;
|
||||
allocData.gpuAllocation = allocation;
|
||||
allocData.size = size;
|
||||
|
||||
this->SVMAllocs.insert(allocData);
|
||||
return allocation->getUnderlyingBuffer();
|
||||
}
|
||||
|
||||
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
|
||||
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
|
||||
AllocationProperties cpuProperties{rootDeviceIndex, true, alignedSize, GraphicsAllocation::AllocationType::SVM_CPU, false};
|
||||
cpuProperties.alignment = 2 * MemoryConstants::megaByte;
|
||||
MemoryPropertiesParser::fillCachePolicyInProperties(cpuProperties, false, svmProperties.readOnly, false);
|
||||
GraphicsAllocation *allocationCpu = memoryManager->allocateGraphicsMemoryWithProperties(cpuProperties);
|
||||
if (!allocationCpu) {
|
||||
return nullptr;
|
||||
}
|
||||
allocationCpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
|
||||
allocationCpu->setCoherent(svmProperties.coherent);
|
||||
void *svmPtr = allocationCpu->getUnderlyingBuffer();
|
||||
|
||||
AllocationProperties gpuProperties{rootDeviceIndex,
|
||||
false,
|
||||
alignedSize,
|
||||
GraphicsAllocation::AllocationType::SVM_GPU,
|
||||
unifiedMemoryProperties.subdeviceBitfield.count() > 1,
|
||||
false,
|
||||
unifiedMemoryProperties.subdeviceBitfield};
|
||||
|
||||
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
|
||||
MemoryPropertiesParser::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false);
|
||||
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties, svmPtr);
|
||||
if (!allocationGpu) {
|
||||
memoryManager->freeGraphicsMemory(allocationCpu);
|
||||
return nullptr;
|
||||
}
|
||||
allocationGpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
|
||||
allocationGpu->setCoherent(svmProperties.coherent);
|
||||
|
||||
SvmAllocationData allocData;
|
||||
allocData.gpuAllocation = allocationGpu;
|
||||
allocData.cpuAllocation = allocationCpu;
|
||||
allocData.device = unifiedMemoryProperties.device;
|
||||
allocData.size = size;
|
||||
|
||||
this->SVMAllocs.insert(allocData);
|
||||
return svmPtr;
|
||||
}
|
||||
|
||||
void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) {
|
||||
GraphicsAllocation *gpuAllocation = svmData->gpuAllocation;
|
||||
SVMAllocs.remove(*svmData);
|
||||
|
||||
memoryManager->freeGraphicsMemory(gpuAllocation);
|
||||
}
|
||||
|
||||
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {
|
||||
GraphicsAllocation *gpuAllocation = svmData->gpuAllocation;
|
||||
GraphicsAllocation *cpuAllocation = svmData->cpuAllocation;
|
||||
SVMAllocs.remove(*svmData);
|
||||
|
||||
memoryManager->freeGraphicsMemory(gpuAllocation);
|
||||
memoryManager->freeGraphicsMemory(cpuAllocation);
|
||||
}
|
||||
|
||||
SvmMapOperation *SVMAllocsManager::getSvmMapOperation(const void *ptr) {
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
return svmMapOperations.get(ptr);
|
||||
}
|
||||
|
||||
void SVMAllocsManager::insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap) {
|
||||
SvmMapOperation svmMapOperation;
|
||||
svmMapOperation.regionSvmPtr = regionSvmPtr;
|
||||
svmMapOperation.baseSvmPtr = baseSvmPtr;
|
||||
svmMapOperation.offset = offset;
|
||||
svmMapOperation.regionSize = regionSize;
|
||||
svmMapOperation.readOnlyMap = readOnlyMap;
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
svmMapOperations.insert(svmMapOperation);
|
||||
}
|
||||
|
||||
void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) {
|
||||
std::unique_lock<SpinLock> lock(mtx);
|
||||
svmMapOperations.remove(regionSvmPtr);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
110
shared/source/memory_manager/unified_memory_manager.h
Normal file
110
shared/source/memory_manager/unified_memory_manager.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/common_types.h"
|
||||
#include "unified_memory/unified_memory.h"
|
||||
#include "utilities/spinlock.h"
|
||||
|
||||
#include "memory_properties_flags.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
|
||||
struct SvmAllocationData {
|
||||
GraphicsAllocation *cpuAllocation = nullptr;
|
||||
GraphicsAllocation *gpuAllocation = nullptr;
|
||||
size_t size = 0;
|
||||
InternalMemoryType memoryType = InternalMemoryType::SVM;
|
||||
MemoryPropertiesFlags allocationFlagsProperty;
|
||||
void *device = nullptr;
|
||||
};
|
||||
|
||||
struct SvmMapOperation {
|
||||
void *regionSvmPtr = nullptr;
|
||||
size_t regionSize = 0;
|
||||
void *baseSvmPtr = nullptr;
|
||||
size_t offset = 0;
|
||||
bool readOnlyMap = false;
|
||||
};
|
||||
|
||||
class SVMAllocsManager {
|
||||
public:
|
||||
class MapBasedAllocationTracker {
|
||||
friend class SVMAllocsManager;
|
||||
|
||||
public:
|
||||
using SvmAllocationContainer = std::map<const void *, SvmAllocationData>;
|
||||
void insert(SvmAllocationData);
|
||||
void remove(SvmAllocationData);
|
||||
SvmAllocationData *get(const void *);
|
||||
size_t getNumAllocs() const { return allocations.size(); };
|
||||
|
||||
protected:
|
||||
SvmAllocationContainer allocations;
|
||||
};
|
||||
|
||||
struct MapOperationsTracker {
|
||||
using SvmMapOperationsContainer = std::map<const void *, SvmMapOperation>;
|
||||
void insert(SvmMapOperation);
|
||||
void remove(const void *);
|
||||
SvmMapOperation *get(const void *);
|
||||
size_t getNumMapOperations() const { return operations.size(); };
|
||||
|
||||
protected:
|
||||
SvmMapOperationsContainer operations;
|
||||
};
|
||||
|
||||
struct SvmAllocationProperties {
|
||||
bool coherent = false;
|
||||
bool hostPtrReadOnly = false;
|
||||
bool readOnly = false;
|
||||
};
|
||||
|
||||
struct UnifiedMemoryProperties {
|
||||
UnifiedMemoryProperties() = default;
|
||||
UnifiedMemoryProperties(InternalMemoryType memoryType) : memoryType(memoryType){};
|
||||
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
MemoryPropertiesFlags allocationFlags;
|
||||
void *device = nullptr;
|
||||
DeviceBitfield subdeviceBitfield;
|
||||
};
|
||||
|
||||
SVMAllocsManager(MemoryManager *memoryManager);
|
||||
void *createSVMAlloc(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties svmProperties);
|
||||
void *createUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &svmProperties);
|
||||
void *createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex, size_t size, const UnifiedMemoryProperties &svmProperties, void *cmdQ);
|
||||
SvmAllocationData *getSVMAlloc(const void *ptr);
|
||||
bool freeSVMAlloc(void *ptr, bool blocking);
|
||||
bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); }
|
||||
size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); }
|
||||
MapBasedAllocationTracker *getSVMAllocs() { return &SVMAllocs; }
|
||||
|
||||
void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap);
|
||||
void removeSvmMapOperation(const void *regionSvmPtr);
|
||||
SvmMapOperation *getSvmMapOperation(const void *regionPtr);
|
||||
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
|
||||
void *createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
|
||||
void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
|
||||
|
||||
protected:
|
||||
void *createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties);
|
||||
|
||||
void freeZeroCopySvmAllocation(SvmAllocationData *svmData);
|
||||
|
||||
MapBasedAllocationTracker SVMAllocs;
|
||||
MapOperationsTracker svmMapOperations;
|
||||
MemoryManager *memoryManager;
|
||||
SpinLock mtx;
|
||||
};
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user