mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Graphics Allocation: store task count per context id
Move definition of allocations list method to internal_allocation_storage.cpp Change-Id: I4c6038df8fd1b9335e8a74edbab33b78f9293d8f Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
ea2e634f7e
commit
815ae851b7
@@ -11,10 +11,11 @@
|
||||
|
||||
namespace OCLRT {
|
||||
class GraphicsAllocation;
|
||||
class CommandStreamReceiver;
|
||||
|
||||
class AllocationsList : public IDList<GraphicsAllocation, true, true> {
|
||||
public:
|
||||
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired);
|
||||
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired);
|
||||
|
||||
private:
|
||||
GraphicsAllocation *detachAllocationImpl(GraphicsAllocation *, void *);
|
||||
|
||||
@@ -5,13 +5,54 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics_allocation.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
|
||||
bool OCLRT::GraphicsAllocation::isL3Capable() {
|
||||
namespace OCLRT {
|
||||
bool GraphicsAllocation::isL3Capable() {
|
||||
auto ptr = ptrOffset(cpuPtr, static_cast<size_t>(this->allocationOffset));
|
||||
if (alignUp(ptr, MemoryConstants::cacheLineSize) == ptr && alignUp(this->size, MemoryConstants::cacheLineSize) == this->size) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : gpuBaseAddress(baseAddress),
|
||||
size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
gpuAddress(gpuAddress),
|
||||
taskCounts(maxOsContextCount) {
|
||||
initTaskCounts();
|
||||
}
|
||||
|
||||
GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
gpuAddress(castToUint64(cpuPtrIn)),
|
||||
sharedHandle(sharedHandleIn),
|
||||
taskCounts(maxOsContextCount) {
|
||||
initTaskCounts();
|
||||
}
|
||||
GraphicsAllocation::~GraphicsAllocation() = default;
|
||||
|
||||
bool GraphicsAllocation::peekWasUsed() const { return registeredContextsNum > 0; }
|
||||
|
||||
void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
|
||||
UNRECOVERABLE_IF(contextId >= taskCounts.size());
|
||||
if (taskCounts[contextId] == ObjectNotUsed) {
|
||||
registeredContextsNum++;
|
||||
}
|
||||
if (newTaskCount == ObjectNotUsed) {
|
||||
registeredContextsNum--;
|
||||
}
|
||||
taskCounts[contextId] = newTaskCount;
|
||||
}
|
||||
|
||||
uint32_t GraphicsAllocation::getTaskCount(uint32_t contextId) const {
|
||||
UNRECOVERABLE_IF(contextId >= taskCounts.size());
|
||||
return taskCounts[contextId];
|
||||
}
|
||||
void GraphicsAllocation::initTaskCounts() {
|
||||
for (auto i = 0u; i < taskCounts.size(); i++) {
|
||||
taskCounts[i] = ObjectNotUsed;
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "runtime/memory_manager/memory_pool.h"
|
||||
#include "runtime/memory_manager/residency_container.h"
|
||||
#include "runtime/utilities/idlist.h"
|
||||
#include "runtime/utilities/stackvec.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
@@ -34,19 +35,7 @@ const uint32_t ObjectNotUsed = (uint32_t)-1;
|
||||
class Gmm;
|
||||
|
||||
class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
protected:
|
||||
size_t size = 0;
|
||||
void *cpuPtr = nullptr;
|
||||
uint64_t gpuAddress = 0;
|
||||
bool coherent = false;
|
||||
osHandle sharedHandle;
|
||||
bool locked = false;
|
||||
uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
|
||||
bool evictable = true;
|
||||
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
|
||||
|
||||
public:
|
||||
uint32_t taskCount = ObjectNotUsed;
|
||||
OsHandleStorage fragmentsStorage;
|
||||
bool is32BitAllocation = false;
|
||||
uint64_t gpuBaseAddress = 0;
|
||||
@@ -81,20 +70,13 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
SHARED_RESOURCE,
|
||||
};
|
||||
|
||||
virtual ~GraphicsAllocation() = default;
|
||||
virtual ~GraphicsAllocation();
|
||||
GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
|
||||
GraphicsAllocation(const GraphicsAllocation &) = delete;
|
||||
|
||||
GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
gpuAddress(gpuAddress),
|
||||
sharedHandle(Sharing::nonSharedResource),
|
||||
gpuBaseAddress(baseAddress) {}
|
||||
GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn);
|
||||
|
||||
GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
|
||||
cpuPtr(cpuPtrIn),
|
||||
gpuAddress(castToUint64(cpuPtrIn)),
|
||||
sharedHandle(sharedHandleIn) {}
|
||||
GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn);
|
||||
|
||||
void *getUnderlyingBuffer() const { return cpuPtr; }
|
||||
void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
|
||||
@@ -141,14 +123,30 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
MemoryPool::Type getMemoryPool() {
|
||||
return memoryPool;
|
||||
}
|
||||
bool peekWasUsed() const;
|
||||
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
|
||||
uint32_t getTaskCount(uint32_t contextId) const;
|
||||
|
||||
protected:
|
||||
void initTaskCounts();
|
||||
|
||||
//this variable can only be modified from SubmissionAggregator
|
||||
friend class SubmissionAggregator;
|
||||
size_t size = 0;
|
||||
void *cpuPtr = nullptr;
|
||||
uint64_t gpuAddress = 0;
|
||||
bool coherent = false;
|
||||
osHandle sharedHandle = Sharing::nonSharedResource;
|
||||
bool locked = false;
|
||||
uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
|
||||
bool evictable = true;
|
||||
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
|
||||
uint32_t inspectionId = 0;
|
||||
AllocationType allocationType = AllocationType::UNKNOWN;
|
||||
bool aubWritable = true;
|
||||
bool allocDumpable = false;
|
||||
bool memObjectsAllocationWithWritableFlags = false;
|
||||
StackVec<uint32_t, maxOsContextCount> taskCounts;
|
||||
std::atomic<uint32_t> registeredContextsNum{0};
|
||||
};
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
|
||||
namespace OCLRT {
|
||||
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver){};
|
||||
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver), contextId(commandStreamReceiver.getDeviceIndex()){};
|
||||
void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage) {
|
||||
uint32_t taskCount = gfxAllocation->taskCount;
|
||||
uint32_t taskCount = gfxAllocation->getTaskCount(contextId);
|
||||
|
||||
if (allocationUsage == REUSABLE_ALLOCATION) {
|
||||
taskCount = commandStreamReceiver.peekTaskCount();
|
||||
@@ -28,7 +28,7 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<Gra
|
||||
}
|
||||
}
|
||||
auto &allocationsList = (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse;
|
||||
gfxAllocation->taskCount = taskCount;
|
||||
gfxAllocation->updateTaskCount(taskCount, contextId);
|
||||
allocationsList.pushTailOne(*gfxAllocation.release());
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
|
||||
IDList<GraphicsAllocation, false, true> allocationsLeft;
|
||||
while (curr != nullptr) {
|
||||
auto *next = curr->next;
|
||||
if (curr->taskCount <= waitTaskCount) {
|
||||
if (curr->getTaskCount(contextId) <= waitTaskCount) {
|
||||
memoryManager->freeGraphicsMemory(curr);
|
||||
} else {
|
||||
allocationsLeft.pushTailOne(*curr);
|
||||
@@ -57,8 +57,41 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, bool internalAllocation) {
|
||||
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver.getTagAddress(), internalAllocation);
|
||||
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver, internalAllocation);
|
||||
return allocation;
|
||||
}
|
||||
|
||||
struct ReusableAllocationRequirements {
|
||||
size_t requiredMinimalSize;
|
||||
volatile uint32_t *csrTagAddress;
|
||||
bool internalAllocationRequired;
|
||||
uint32_t contextId;
|
||||
};
|
||||
|
||||
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired) {
|
||||
ReusableAllocationRequirements req;
|
||||
req.requiredMinimalSize = requiredMinimalSize;
|
||||
req.csrTagAddress = commandStreamReceiver.getTagAddress();
|
||||
req.internalAllocationRequired = internalAllocationRequired;
|
||||
req.contextId = commandStreamReceiver.getDeviceIndex();
|
||||
GraphicsAllocation *a = nullptr;
|
||||
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
|
||||
return std::unique_ptr<GraphicsAllocation>(retAlloc);
|
||||
}
|
||||
|
||||
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
|
||||
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
|
||||
auto *curr = head;
|
||||
while (curr != nullptr) {
|
||||
auto currentTagValue = *req->csrTagAddress;
|
||||
if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
|
||||
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
|
||||
((currentTagValue > curr->getTaskCount(req->contextId)) || (curr->getTaskCount(req->contextId) == 0))) {
|
||||
return removeOneImpl(curr, nullptr);
|
||||
}
|
||||
curr = curr->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -27,6 +27,7 @@ class InternalAllocationStorage {
|
||||
protected:
|
||||
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
|
||||
CommandStreamReceiver &commandStreamReceiver;
|
||||
const uint32_t contextId;
|
||||
|
||||
AllocationsList temporaryAllocations;
|
||||
AllocationsList allocationsForReuse;
|
||||
|
||||
@@ -27,37 +27,6 @@
|
||||
namespace OCLRT {
|
||||
constexpr size_t TagCount = 512;
|
||||
|
||||
struct ReusableAllocationRequirements {
|
||||
size_t requiredMinimalSize;
|
||||
volatile uint32_t *csrTagAddress;
|
||||
bool internalAllocationRequired;
|
||||
};
|
||||
|
||||
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired) {
|
||||
ReusableAllocationRequirements req;
|
||||
req.requiredMinimalSize = requiredMinimalSize;
|
||||
req.csrTagAddress = csrTagAddress;
|
||||
req.internalAllocationRequired = internalAllocationRequired;
|
||||
GraphicsAllocation *a = nullptr;
|
||||
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
|
||||
return std::unique_ptr<GraphicsAllocation>(retAlloc);
|
||||
}
|
||||
|
||||
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
|
||||
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
|
||||
auto *curr = head;
|
||||
while (curr != nullptr) {
|
||||
auto currentTagValue = req->csrTagAddress ? *req->csrTagAddress : -1;
|
||||
if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
|
||||
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
|
||||
((currentTagValue > curr->taskCount) || (curr->taskCount == 0))) {
|
||||
return removeOneImpl(curr, nullptr);
|
||||
}
|
||||
curr = curr->next;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
|
||||
ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
|
||||
localMemorySupported(enableLocalMemory),
|
||||
@@ -198,7 +167,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
|
||||
//if not in use destroy in place
|
||||
//if in use pass to temporary allocation list that is cleaned on blocking calls
|
||||
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
|
||||
if (gfxAllocation->taskCount == ObjectNotUsed || gfxAllocation->taskCount <= *getCommandStreamReceiver(0)->getTagAddress()) {
|
||||
if (!gfxAllocation->peekWasUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceiver(0)->getTagAddress()) {
|
||||
freeGraphicsMemory(gfxAllocation);
|
||||
} else {
|
||||
getCommandStreamReceiver(0)->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);
|
||||
|
||||
Reference in New Issue
Block a user