feature: in-order host counter allocation pooling

Related-To: NEO-10507

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2024-03-05 16:52:05 +00:00
committed by Compute-Runtime-Automation
parent 4af5851778
commit 2f03c48c7a
12 changed files with 137 additions and 68 deletions

View File

@@ -21,26 +21,34 @@ NEO::DebuggerL0 *Device::getL0Debugger() {
return getNEODevice()->getL0Debugger();
}
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() {
if (!deviceInOrderCounterAllocator.get()) {
template <typename NodeT>
NEO::TagAllocatorBase *getInOrderCounterAllocator(std::unique_ptr<NEO::TagAllocatorBase> &allocator, std::mutex &inOrderAllocatorMutex, NEO::Device &neoDevice) {
if (!allocator.get()) {
std::unique_lock<std::mutex> lock(inOrderAllocatorMutex);
if (!deviceInOrderCounterAllocator.get()) {
using NodeT = typename NEO::DeviceAllocNodeType<true>;
RootDeviceIndicesContainer rootDeviceIndices = {getRootDeviceIndex()};
if (!allocator.get()) {
RootDeviceIndicesContainer rootDeviceIndices = {neoDevice.getRootDeviceIndex()};
const size_t maxPartitionCount = getNEODevice()->getDeviceBitfield().count();
const size_t maxPartitionCount = neoDevice.getDeviceBitfield().count();
const size_t nodeSize = sizeof(uint64_t) * maxPartitionCount * 2; // Multiplied by 2 to handle 32b overflow
DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k);
deviceInOrderCounterAllocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice->getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, neoDevice->getDeviceBitfield());
allocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice.getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, neoDevice.getDeviceBitfield());
}
}
return deviceInOrderCounterAllocator.get();
return allocator.get();
}
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<true>>(deviceInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
}
NEO::TagAllocatorBase *Device::getHostInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<false>>(hostInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
}
} // namespace L0

View File

@@ -150,10 +150,12 @@ struct Device : _ze_device_handle_t {
virtual uint32_t getEventMaxPacketCount() const = 0;
virtual uint32_t getEventMaxKernelCount() const = 0;
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
protected:
NEO::Device *neoDevice = nullptr;
std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator;
std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator;
std::mutex inOrderAllocatorMutex;
bool implicitScalingCapable = false;
};

View File

@@ -1468,6 +1468,7 @@ void DeviceImp::releaseResources() {
builtins.reset();
cacheReservation.reset();
deviceInOrderCounterAllocator.reset();
hostInOrderCounterAllocator.reset();
if (allocationsForReuse.get()) {
allocationsForReuse->freeAllGraphicsAllocations(neoDevice);