performance: share inter-module ISA allocations

Related-To: NEO-10258

Currently each module has it's own GA
for kernel ISA's. This change allows new modules to
reuse existing allocation.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-04-29 17:12:50 +00:00
committed by Compute-Runtime-Automation
parent 349a86a8dc
commit 10ed479b16
14 changed files with 414 additions and 35 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2023 Intel Corporation
# Copyright (C) 2019-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -47,6 +47,8 @@ set(NEO_CORE_UTILITIES
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.h
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator.h
)
set(NEO_CORE_UTILITIES_WINDOWS

View File

@@ -49,6 +49,8 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyabl
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback);
AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool);
AbstractBuffersPool &operator=(AbstractBuffersPool &&) = delete;
virtual ~AbstractBuffersPool() = default;
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
bool isPoolBuffer(const BufferParentType *buffer) const;
void drain();
@@ -79,7 +81,7 @@ class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
using Params::startingOffset;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
void releaseSmallBufferPool() { this->bufferPools.clear(); }
void releasePools() { this->bufferPools.clear(); }
bool isPoolBuffer(const BufferParentType *buffer) const;
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);

View File

@@ -0,0 +1,133 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/isa_pool_allocator.h"
#include "shared/source/device/device.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
namespace NEO {
ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize)
: BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) {
this->chunkAllocator.reset(new NEO::HeapAllocator(startingOffset, storageSize, MemoryConstants::pageSize, 0u));
auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(),
storageSize,
allocationType,
device->getDeviceBitfield()});
this->mainStorage.reset(graphicsAllocation);
this->mtx = std::make_unique<std::mutex>();
this->stackVec.push_back(graphicsAllocation);
}
ISAPool::ISAPool(ISAPool &&pool) : BaseType(std::move(pool)) {
this->isBuiltin = pool.isBuiltin;
mtx.reset(pool.mtx.release());
this->stackVec = std::move(pool.stackVec);
this->device = pool.device;
}
ISAPool::~ISAPool() {
if (mainStorage) {
device->getMemoryManager()->freeGraphicsMemory(mainStorage.release());
}
}
SharedIsaAllocation *ISAPool::allocateISA(size_t requestedSize) const {
auto offset = static_cast<size_t>(this->chunkAllocator->allocate(requestedSize));
if (offset == 0) {
return nullptr;
}
return new SharedIsaAllocation{this->mainStorage.get(), offset - startingOffset, requestedSize, mtx.get()};
}
const StackVec<NEO::GraphicsAllocation *, 1> &ISAPool::getAllocationsVector() {
return stackVec;
}
ISAPoolAllocator::ISAPoolAllocator(Device *device) : device(device) {
}
/**
* @brief This method allocates SharedIsaAllocation object for a single user (module or program).
* In first step, it checks if requested size for the ISA is higher than default pool size
* and creates new ISA pool if it is.
* Next, it tries to allocate using existing pools.
* If failed, all existing pools are drained and performs allocation again.
* If failed, creates another ISA pool and tries to allocate again.
*
* @param[in] isBuiltin flag specifying whether ISA will be used for builtin kernels
* @param[in] size size requested by the client.
*
* @return returns SharedIsaAllocation or nullptr if allocation didn't succeeded
*/
SharedIsaAllocation *ISAPoolAllocator::requestGraphicsAllocationForIsa(bool isBuiltin, size_t size) {
std::unique_lock lock(allocatorMtx);
auto maxAllocationSize = getAllocationSize(isBuiltin);
if (size > maxAllocationSize) {
addNewBufferPool(ISAPool(device, isBuiltin, size));
}
auto sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
if (sharedIsaAllocation) {
return sharedIsaAllocation;
}
drain();
sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
if (sharedIsaAllocation) {
return sharedIsaAllocation;
}
addNewBufferPool(ISAPool(device, isBuiltin, getAllocationSize(isBuiltin)));
return tryAllocateISA(isBuiltin, size);
}
/**
* @brief This method releases SharedIsaAllocation.
*
* @param[in] sharedIsaAllocation SharedIsaAllocation to free.
*
* @note actual chunk is not released immediately, it's freed during drain call.
*/
void ISAPoolAllocator::freeSharedIsaAllocation(SharedIsaAllocation *sharedIsaAllocation) {
std::unique_lock lock(allocatorMtx);
tryFreeFromPoolBuffer(sharedIsaAllocation->getGraphicsAllocation(), sharedIsaAllocation->getOffset(), sharedIsaAllocation->getSize());
delete sharedIsaAllocation;
}
/**
* @brief This method iterates over existing pools and tries to allocate shared isa allocation
* on one of them. It will use only pools with correct isa type.
*
* @param[in] isBuiltin flag specifying whether ISA will be used for builtin kernels
* @param[in] size size requested by the user.
*
* @return returns SharedIsaAllocation or nullptr if allocation didn't succeeded
*/
SharedIsaAllocation *ISAPoolAllocator::tryAllocateISA(bool isBuiltin, size_t size) {
for (auto &isaPoolParent : this->bufferPools) {
auto &isaPool = static_cast<ISAPool &>(isaPoolParent);
if (isaPool.isBuiltinPool() == isBuiltin) {
auto sharedIsaAllocation = isaPool.allocateISA(size);
if (sharedIsaAllocation != nullptr) {
return sharedIsaAllocation;
}
}
}
return nullptr;
}
} // namespace NEO

View File

@@ -0,0 +1,86 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/constants.h"
#include "shared/source/utilities/buffer_pool_allocator.h"
#include <mutex>
namespace NEO {
class GraphicsAllocation;
class Device;
class SharedIsaAllocation {
public:
SharedIsaAllocation(GraphicsAllocation *graphicsAllocation, size_t offset, size_t size, std::mutex *mtx)
: graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(*mtx){};
GraphicsAllocation *getGraphicsAllocation() const {
return graphicsAllocation;
}
size_t getOffset() const {
return offset;
}
size_t getSize() const {
return size;
}
std::unique_lock<std::mutex> obtainSharedAllocationLock() {
return std::unique_lock<std::mutex>(mtx);
}
private:
GraphicsAllocation *graphicsAllocation;
const size_t offset;
const size_t size;
std::mutex &mtx; // This mutex is shared across all users of this GA
};
// Each shared GA is maintained by single ISAPool
class ISAPool : public AbstractBuffersPool<ISAPool, GraphicsAllocation> {
using BaseType = AbstractBuffersPool<ISAPool, GraphicsAllocation>;
public:
ISAPool(ISAPool &&pool);
ISAPool(Device *device, bool isBuiltin, size_t storageSize);
~ISAPool() override;
SharedIsaAllocation *allocateISA(size_t requestedSize) const;
const StackVec<GraphicsAllocation *, 1> &getAllocationsVector();
bool isBuiltinPool() const { return isBuiltin; }
private:
Device *device;
bool isBuiltin;
StackVec<GraphicsAllocation *, 1> stackVec;
std::unique_ptr<std::mutex> mtx;
};
class ISAPoolAllocator : public AbstractBuffersAllocator<ISAPool, GraphicsAllocation> {
public:
ISAPoolAllocator(Device *device);
SharedIsaAllocation *requestGraphicsAllocationForIsa(bool isBuiltin, size_t size);
void freeSharedIsaAllocation(SharedIsaAllocation *sharedIsaAllocation);
private:
SharedIsaAllocation *tryAllocateISA(bool isBuiltin, size_t size);
size_t getAllocationSize(bool isBuiltin) const {
return isBuiltin ? buitinAllocationSize : userAllocationSize;
}
Device *device;
size_t userAllocationSize = MemoryConstants::pageSize2M * 2;
size_t buitinAllocationSize = MemoryConstants::pageSize64k;
std::mutex allocatorMtx;
};
} // namespace NEO