feature: bindless addressing allocator - reuse of bindless slots

- introduce 2 reuse pools to bindlessHeapHelper
- one pool stores slots for reuse, second pool stores released slots
- stateCacheDirty flags keep track of state cache - when pools are
switched - flags are set indicating flushing caches is needed after
old slots have been reused for new allocations

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-08-25 14:28:27 +00:00
committed by Compute-Runtime-Automation
parent edd5523f82
commit 00de13939d
5 changed files with 310 additions and 25 deletions

View File

@@ -14,6 +14,7 @@
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
namespace NEO {
@@ -61,20 +62,59 @@ GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
}
void BindlessHeapsHelper::clearStateDirtyForContext(uint32_t osContextId) {
std::lock_guard<std::mutex> autolock(this->mtx);
stateCacheDirtyForContext.reset(osContextId);
}
bool BindlessHeapsHelper::getStateDirtyForContext(uint32_t osContextId) {
std::lock_guard<std::mutex> autolock(this->mtx);
return stateCacheDirtyForContext.test(osContextId);
}
SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, GraphicsAllocation *surfaceAllocation, BindlesHeapType heapType) {
auto heap = surfaceStateHeaps[heapType].get();
std::lock_guard<std::mutex> autolock(this->mtx);
if (heapType == BindlesHeapType::GLOBAL_SSH) {
int index = getReusedSshVectorIndex(ssSize);
if (!allocateFromReusePool) {
if ((surfaceStateInHeapVectorReuse[releasePoolIndex][0].size() + surfaceStateInHeapVectorReuse[releasePoolIndex][1].size()) > reuseSlotCountThreshold) {
if (surfaceStateInHeapVectorReuse[index].size()) {
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse[index].back();
surfaceStateInHeapVectorReuse[index].pop_back();
return surfaceStateFromVector;
// invalidate all contexts
stateCacheDirtyForContext.set();
allocateFromReusePool = true;
allocatePoolIndex = releasePoolIndex;
releasePoolIndex = allocatePoolIndex == 0 ? 1 : 0;
}
}
if (allocateFromReusePool) {
int index = getReusedSshVectorIndex(ssSize);
if (surfaceStateInHeapVectorReuse[allocatePoolIndex][index].size()) {
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse[allocatePoolIndex][index].back();
surfaceStateInHeapVectorReuse[allocatePoolIndex][index].pop_back();
if (surfaceStateInHeapVectorReuse[allocatePoolIndex][index].empty()) {
allocateFromReusePool = false;
// copy remaining slots from allocate pool to release pool
int otherSizeIndex = index == 0 ? 1 : 0;
surfaceStateInHeapVectorReuse[releasePoolIndex][otherSizeIndex].insert(surfaceStateInHeapVectorReuse[releasePoolIndex][otherSizeIndex].end(),
surfaceStateInHeapVectorReuse[allocatePoolIndex][otherSizeIndex].begin(),
surfaceStateInHeapVectorReuse[allocatePoolIndex][otherSizeIndex].end());
surfaceStateInHeapVectorReuse[allocatePoolIndex][otherSizeIndex].clear();
}
return surfaceStateFromVector;
}
}
}
void *ptrInHeap = getSpaceInHeap(ssSize, heapType);
SurfaceStateInHeapInfo bindlesInfo = {nullptr, 0, nullptr};
@@ -128,14 +168,13 @@ bool BindlessHeapsHelper::growHeap(BindlesHeapType heapType) {
return true;
}
void BindlessHeapsHelper::placeSSAllocationInReuseVectorOnFreeMemory(GraphicsAllocation *gfxAllocation) {
auto ssAllocatedInfo = gfxAllocation->getBindlessInfo();
if (ssAllocatedInfo.heapAllocation != nullptr) {
void BindlessHeapsHelper::releaseSSToReusePool(const SurfaceStateInHeapInfo &surfStateInfo) {
if (surfStateInfo.heapAllocation != nullptr) {
std::lock_guard<std::mutex> autolock(this->mtx);
int index = getReusedSshVectorIndex(ssAllocatedInfo.ssSize);
surfaceStateInHeapVectorReuse[index].push_back(std::move(ssAllocatedInfo));
int index = getReusedSshVectorIndex(surfStateInfo.ssSize);
surfaceStateInHeapVectorReuse[releasePoolIndex][index].push_back(std::move(surfStateInfo));
}
return;
}

View File

@@ -10,6 +10,7 @@
#include "shared/source/helpers/heap_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include <array>
#include <memory>
#include <mutex>
#include <unordered_map>
@@ -18,7 +19,6 @@
namespace NEO {
class IndirectHeap;
class BindlessHeapsHelper {
public:
enum BindlesHeapType {
@@ -41,7 +41,7 @@ class BindlessHeapsHelper {
uint32_t getDefaultBorderColorOffset();
uint32_t getAlphaBorderColorOffset();
IndirectHeap *getHeap(BindlesHeapType heapType);
void placeSSAllocationInReuseVectorOnFreeMemory(GraphicsAllocation *gfxAllocation);
void releaseSSToReusePool(const SurfaceStateInHeapInfo &surfStateInfo);
bool isGlobalDshSupported() {
return globalBindlessDsh;
}
@@ -55,6 +55,8 @@ class BindlessHeapsHelper {
}
return index;
}
bool getStateDirtyForContext(uint32_t osContextId);
void clearStateDirtyForContext(uint32_t osContextId);
protected:
const size_t surfaceStateSize;
@@ -65,7 +67,14 @@ class BindlessHeapsHelper {
std::unique_ptr<IndirectHeap> surfaceStateHeaps[BindlesHeapType::NUM_HEAP_TYPES];
GraphicsAllocation *borderColorStates;
std::vector<GraphicsAllocation *> ssHeapsAllocations;
std::vector<SurfaceStateInHeapInfo> surfaceStateInHeapVectorReuse[2];
size_t reuseSlotCountThreshold = 512;
uint32_t allocatePoolIndex = 0;
uint32_t releasePoolIndex = 0;
bool allocateFromReusePool = false;
std::array<std::vector<SurfaceStateInHeapInfo>, 2> surfaceStateInHeapVectorReuse[2];
std::bitset<64> stateCacheDirtyForContext;
std::mutex mtx;
DeviceBitfield deviceBitfield;
bool globalBindlessDsh = false;

View File

@@ -245,7 +245,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation, bool i
return;
}
if (ApiSpecificConfig::getGlobalBindlessHeapConfiguration() && executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->getBindlessHeapsHelper() != nullptr) {
executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->getBindlessHeapsHelper()->placeSSAllocationInReuseVectorOnFreeMemory(gfxAllocation);
executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->getBindlessHeapsHelper()->releaseSSToReusePool(gfxAllocation->getBindlessInfo());
}
const bool hasFragments = gfxAllocation->fragmentsStorage.fragmentCount != 0;
const bool isLocked = gfxAllocation->isLocked();