mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
performance: share inter-module ISA allocations
Related-To: NEO-10258 Currently each module has it's own GA for kernel ISA's. This change allows new modules to reuse existing allocation. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
349a86a8dc
commit
10ed479b16
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2019-2023 Intel Corporation
|
||||
# Copyright (C) 2019-2024 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -47,6 +47,8 @@ set(NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator.h
|
||||
)
|
||||
|
||||
set(NEO_CORE_UTILITIES_WINDOWS
|
||||
|
||||
@@ -49,6 +49,8 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyabl
|
||||
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback);
|
||||
AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool);
|
||||
AbstractBuffersPool &operator=(AbstractBuffersPool &&) = delete;
|
||||
virtual ~AbstractBuffersPool() = default;
|
||||
|
||||
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
|
||||
bool isPoolBuffer(const BufferParentType *buffer) const;
|
||||
void drain();
|
||||
@@ -79,7 +81,7 @@ class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
|
||||
using Params::startingOffset;
|
||||
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
|
||||
|
||||
void releaseSmallBufferPool() { this->bufferPools.clear(); }
|
||||
void releasePools() { this->bufferPools.clear(); }
|
||||
bool isPoolBuffer(const BufferParentType *buffer) const;
|
||||
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
|
||||
|
||||
|
||||
133
shared/source/utilities/isa_pool_allocator.cpp
Normal file
133
shared/source/utilities/isa_pool_allocator.cpp
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/isa_pool_allocator.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize)
|
||||
: BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) {
|
||||
this->chunkAllocator.reset(new NEO::HeapAllocator(startingOffset, storageSize, MemoryConstants::pageSize, 0u));
|
||||
|
||||
auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa;
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(),
|
||||
storageSize,
|
||||
allocationType,
|
||||
device->getDeviceBitfield()});
|
||||
this->mainStorage.reset(graphicsAllocation);
|
||||
|
||||
this->mtx = std::make_unique<std::mutex>();
|
||||
this->stackVec.push_back(graphicsAllocation);
|
||||
}
|
||||
|
||||
ISAPool::ISAPool(ISAPool &&pool) : BaseType(std::move(pool)) {
|
||||
this->isBuiltin = pool.isBuiltin;
|
||||
mtx.reset(pool.mtx.release());
|
||||
this->stackVec = std::move(pool.stackVec);
|
||||
this->device = pool.device;
|
||||
}
|
||||
|
||||
ISAPool::~ISAPool() {
|
||||
if (mainStorage) {
|
||||
device->getMemoryManager()->freeGraphicsMemory(mainStorage.release());
|
||||
}
|
||||
}
|
||||
|
||||
SharedIsaAllocation *ISAPool::allocateISA(size_t requestedSize) const {
|
||||
auto offset = static_cast<size_t>(this->chunkAllocator->allocate(requestedSize));
|
||||
if (offset == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return new SharedIsaAllocation{this->mainStorage.get(), offset - startingOffset, requestedSize, mtx.get()};
|
||||
}
|
||||
|
||||
const StackVec<NEO::GraphicsAllocation *, 1> &ISAPool::getAllocationsVector() {
|
||||
return stackVec;
|
||||
}
|
||||
|
||||
ISAPoolAllocator::ISAPoolAllocator(Device *device) : device(device) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This method allocates SharedIsaAllocation object for a single user (module or program).
|
||||
* In first step, it checks if requested size for the ISA is higher than default pool size
|
||||
* and creates new ISA pool if it is.
|
||||
* Next, it tries to allocate using existing pools.
|
||||
* If failed, all existing pools are drained and performs allocation again.
|
||||
* If failed, creates another ISA pool and tries to allocate again.
|
||||
*
|
||||
* @param[in] isBuiltin flag specifying whether ISA will be used for builtin kernels
|
||||
* @param[in] size size requested by the client.
|
||||
*
|
||||
* @return returns SharedIsaAllocation or nullptr if allocation didn't succeeded
|
||||
*/
|
||||
SharedIsaAllocation *ISAPoolAllocator::requestGraphicsAllocationForIsa(bool isBuiltin, size_t size) {
|
||||
std::unique_lock lock(allocatorMtx);
|
||||
|
||||
auto maxAllocationSize = getAllocationSize(isBuiltin);
|
||||
|
||||
if (size > maxAllocationSize) {
|
||||
addNewBufferPool(ISAPool(device, isBuiltin, size));
|
||||
}
|
||||
|
||||
auto sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
|
||||
if (sharedIsaAllocation) {
|
||||
return sharedIsaAllocation;
|
||||
}
|
||||
|
||||
drain();
|
||||
|
||||
sharedIsaAllocation = tryAllocateISA(isBuiltin, size);
|
||||
if (sharedIsaAllocation) {
|
||||
return sharedIsaAllocation;
|
||||
}
|
||||
|
||||
addNewBufferPool(ISAPool(device, isBuiltin, getAllocationSize(isBuiltin)));
|
||||
return tryAllocateISA(isBuiltin, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This method releases SharedIsaAllocation.
|
||||
*
|
||||
* @param[in] sharedIsaAllocation SharedIsaAllocation to free.
|
||||
*
|
||||
* @note actual chunk is not released immediately, it's freed during drain call.
|
||||
*/
|
||||
void ISAPoolAllocator::freeSharedIsaAllocation(SharedIsaAllocation *sharedIsaAllocation) {
|
||||
std::unique_lock lock(allocatorMtx);
|
||||
tryFreeFromPoolBuffer(sharedIsaAllocation->getGraphicsAllocation(), sharedIsaAllocation->getOffset(), sharedIsaAllocation->getSize());
|
||||
delete sharedIsaAllocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This method iterates over existing pools and tries to allocate shared isa allocation
|
||||
* on one of them. It will use only pools with correct isa type.
|
||||
*
|
||||
* @param[in] isBuiltin flag specifying whether ISA will be used for builtin kernels
|
||||
* @param[in] size size requested by the user.
|
||||
*
|
||||
* @return returns SharedIsaAllocation or nullptr if allocation didn't succeeded
|
||||
*/
|
||||
SharedIsaAllocation *ISAPoolAllocator::tryAllocateISA(bool isBuiltin, size_t size) {
|
||||
for (auto &isaPoolParent : this->bufferPools) {
|
||||
auto &isaPool = static_cast<ISAPool &>(isaPoolParent);
|
||||
if (isaPool.isBuiltinPool() == isBuiltin) {
|
||||
auto sharedIsaAllocation = isaPool.allocateISA(size);
|
||||
if (sharedIsaAllocation != nullptr) {
|
||||
return sharedIsaAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
86
shared/source/utilities/isa_pool_allocator.h
Normal file
86
shared/source/utilities/isa_pool_allocator.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
class Device;
|
||||
|
||||
class SharedIsaAllocation {
|
||||
public:
|
||||
SharedIsaAllocation(GraphicsAllocation *graphicsAllocation, size_t offset, size_t size, std::mutex *mtx)
|
||||
: graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(*mtx){};
|
||||
|
||||
GraphicsAllocation *getGraphicsAllocation() const {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
size_t getOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t getSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> obtainSharedAllocationLock() {
|
||||
return std::unique_lock<std::mutex>(mtx);
|
||||
}
|
||||
|
||||
private:
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
const size_t offset;
|
||||
const size_t size;
|
||||
std::mutex &mtx; // This mutex is shared across all users of this GA
|
||||
};
|
||||
|
||||
// Each shared GA is maintained by single ISAPool
|
||||
class ISAPool : public AbstractBuffersPool<ISAPool, GraphicsAllocation> {
|
||||
using BaseType = AbstractBuffersPool<ISAPool, GraphicsAllocation>;
|
||||
|
||||
public:
|
||||
ISAPool(ISAPool &&pool);
|
||||
ISAPool(Device *device, bool isBuiltin, size_t storageSize);
|
||||
~ISAPool() override;
|
||||
|
||||
SharedIsaAllocation *allocateISA(size_t requestedSize) const;
|
||||
const StackVec<GraphicsAllocation *, 1> &getAllocationsVector();
|
||||
bool isBuiltinPool() const { return isBuiltin; }
|
||||
|
||||
private:
|
||||
Device *device;
|
||||
bool isBuiltin;
|
||||
StackVec<GraphicsAllocation *, 1> stackVec;
|
||||
std::unique_ptr<std::mutex> mtx;
|
||||
};
|
||||
|
||||
class ISAPoolAllocator : public AbstractBuffersAllocator<ISAPool, GraphicsAllocation> {
|
||||
public:
|
||||
ISAPoolAllocator(Device *device);
|
||||
SharedIsaAllocation *requestGraphicsAllocationForIsa(bool isBuiltin, size_t size);
|
||||
void freeSharedIsaAllocation(SharedIsaAllocation *sharedIsaAllocation);
|
||||
|
||||
private:
|
||||
SharedIsaAllocation *tryAllocateISA(bool isBuiltin, size_t size);
|
||||
|
||||
size_t getAllocationSize(bool isBuiltin) const {
|
||||
return isBuiltin ? buitinAllocationSize : userAllocationSize;
|
||||
}
|
||||
|
||||
Device *device;
|
||||
size_t userAllocationSize = MemoryConstants::pageSize2M * 2;
|
||||
size_t buitinAllocationSize = MemoryConstants::pageSize64k;
|
||||
std::mutex allocatorMtx;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user