mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-27 15:53:13 +08:00
Refactor buffer pool allocator to support configurable SmallBuffersParams based on product helper capabilities. This patch enables setting custom pool parameters instead of using fixed static values. For devices with 2MB local memory alignment enabled (is2MBLocalMemAlignmentEnabled), use larger pool configuration: - Pool size: 16MB (up from 2MB) - Threshold: 2MB (up from 1MB) - Alignment: 64KB (unchanged) - Starting offset: 64KB (unchanged) This improves memory utilization for devices supporting larger memory alignments while maintaining original parameters for other devices. Key changes: - Moved params from static template to instance member - Added SmallBuffersParams struct with default/large configs - Added constructor and setter methods for params configuration Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
728 lines
29 KiB
C++
728 lines
29 KiB
C++
/*
|
|
* Copyright (C) 2018-2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "opencl/source/context/context.h"
|
|
|
|
#include "shared/source/ail/ail_configuration.h"
|
|
#include "shared/source/built_ins/built_ins.h"
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/compiler_interface/compiler_interface.h"
|
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
|
#include "shared/source/device/sub_device.h"
|
|
#include "shared/source/execution_environment/root_device_environment.h"
|
|
#include "shared/source/helpers/api_specific_config.h"
|
|
#include "shared/source/helpers/get_info.h"
|
|
#include "shared/source/helpers/hw_info.h"
|
|
#include "shared/source/helpers/ptr_math.h"
|
|
#include "shared/source/memory_manager/deferred_deleter.h"
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
#include "shared/source/memory_manager/unified_memory_manager.h"
|
|
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
|
#include "shared/source/utilities/heap_allocator.h"
|
|
#include "shared/source/utilities/staging_buffer_manager.h"
|
|
#include "shared/source/utilities/tag_allocator.h"
|
|
|
|
#include "opencl/source/cl_device/cl_device.h"
|
|
#include "opencl/source/command_queue/command_queue.h"
|
|
#include "opencl/source/execution_environment/cl_execution_environment.h"
|
|
#include "opencl/source/gtpin/gtpin_notify.h"
|
|
#include "opencl/source/helpers/cl_validators.h"
|
|
#include "opencl/source/helpers/get_info_status_mapper.h"
|
|
#include "opencl/source/helpers/surface_formats.h"
|
|
#include "opencl/source/mem_obj/buffer.h"
|
|
#include "opencl/source/mem_obj/image.h"
|
|
#include "opencl/source/platform/platform.h"
|
|
#include "opencl/source/sharings/sharing.h"
|
|
#include "opencl/source/sharings/sharing_factory.h"
|
|
|
|
#include "d3d_sharing_functions.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
|
|
namespace NEO {
|
|
|
|
Context::Context(
|
|
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
|
|
void *data) {
|
|
contextCallback = funcNotify;
|
|
userData = data;
|
|
sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
|
|
}
|
|
|
|
Context::~Context() {
|
|
gtpinNotifyContextDestroy((cl_context)this);
|
|
|
|
if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) {
|
|
multiRootDeviceTimestampPacketAllocator.reset();
|
|
}
|
|
|
|
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
|
|
auto &device = this->getDevice(0)->getDevice();
|
|
device.recordPoolsFreed(smallBufferPoolAllocator.getPoolsCount());
|
|
smallBufferPoolAllocator.releasePools();
|
|
}
|
|
|
|
cleanupUsmAllocationPools();
|
|
|
|
delete[] properties;
|
|
|
|
for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) {
|
|
if (specialQueues[rootDeviceIndex]) {
|
|
delete specialQueues[rootDeviceIndex];
|
|
}
|
|
}
|
|
if (svmAllocsManager) {
|
|
this->stagingBufferManager.reset();
|
|
svmAllocsManager->cleanupUSMAllocCaches();
|
|
delete svmAllocsManager;
|
|
}
|
|
if (driverDiagnostics) {
|
|
delete driverDiagnostics;
|
|
}
|
|
if (memoryManager && memoryManager->isAsyncDeleterEnabled()) {
|
|
memoryManager->getDeferredDeleter()->removeClient();
|
|
}
|
|
destructorCallbacks.invoke(this);
|
|
for (auto &device : devices) {
|
|
device->decRefInternal();
|
|
}
|
|
}
|
|
|
|
cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
|
|
void *userData) {
|
|
std::unique_lock<std::mutex> theLock(mtx);
|
|
destructorCallbacks.add(funcNotify, userData);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
|
|
size_t size,
|
|
uint32_t rootDeviceIndex,
|
|
GraphicsAllocation *&allocation,
|
|
InternalMemoryType &memoryType,
|
|
bool &isCpuCopyAllowed) {
|
|
cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed);
|
|
if (retVal != CL_SUCCESS || allocation != nullptr) {
|
|
return retVal;
|
|
}
|
|
|
|
retVal = tryGetExistingMapAllocation(ptr, size, allocation);
|
|
return retVal;
|
|
}
|
|
|
|
cl_int Context::tryGetExistingSvmAllocation(const void *ptr,
|
|
size_t size,
|
|
uint32_t rootDeviceIndex,
|
|
GraphicsAllocation *&allocation,
|
|
InternalMemoryType &memoryType,
|
|
bool &isCpuCopyAllowed) {
|
|
if (getSVMAllocsManager()) {
|
|
SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
|
|
if (svmEntry) {
|
|
memoryType = svmEntry->memoryType;
|
|
if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
allocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
|
if (isCpuCopyAllowed) {
|
|
if (svmEntry->memoryType == InternalMemoryType::deviceUnifiedMemory) {
|
|
isCpuCopyAllowed = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
cl_int Context::tryGetExistingMapAllocation(const void *ptr,
|
|
size_t size,
|
|
GraphicsAllocation *&allocation) {
|
|
if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
|
|
if (mapInfo.graphicsAllocation) {
|
|
allocation = mapInfo.graphicsAllocation;
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
const RootDeviceIndicesContainer &Context::getRootDeviceIndices() const {
|
|
return rootDeviceIndices;
|
|
}
|
|
|
|
uint32_t Context::getMaxRootDeviceIndex() const {
|
|
return maxRootDeviceIndex;
|
|
}
|
|
|
|
CommandQueue *Context::getSpecialQueue(uint32_t rootDeviceIndex) {
|
|
if (specialQueues[rootDeviceIndex])
|
|
return specialQueues[rootDeviceIndex];
|
|
|
|
static std::mutex mtx;
|
|
std::lock_guard lock(mtx);
|
|
|
|
if (!specialQueues[rootDeviceIndex]) {
|
|
cl_int errcodeRet = CL_SUCCESS;
|
|
auto device = std::find_if(this->getDevices().begin(), this->getDevices().end(), [rootDeviceIndex](const auto &device) {
|
|
return device->getRootDeviceIndex() == rootDeviceIndex;
|
|
});
|
|
|
|
auto commandQueue = CommandQueue::create(this, *device, nullptr, true, errcodeRet);
|
|
DEBUG_BREAK_IF(commandQueue == nullptr);
|
|
DEBUG_BREAK_IF(errcodeRet != CL_SUCCESS);
|
|
overrideSpecialQueueAndDecrementRefCount(commandQueue, rootDeviceIndex);
|
|
}
|
|
|
|
return specialQueues[rootDeviceIndex];
|
|
}
|
|
|
|
void Context::setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
|
|
specialQueues[rootDeviceIndex] = commandQueue;
|
|
}
|
|
void Context::overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex) {
|
|
commandQueue->setIsSpecialCommandQueue(true);
|
|
|
|
// decrement ref count that special queue added
|
|
this->decRefInternal();
|
|
|
|
// above decRefInternal doesn't delete this
|
|
setSpecialQueue(commandQueue, rootDeviceIndex); // NOLINT(clang-analyzer-cplusplus.NewDelete)
|
|
};
|
|
|
|
bool Context::areMultiStorageAllocationsPreferred() {
|
|
return this->contextType != ContextType::CONTEXT_TYPE_SPECIALIZED;
|
|
}
|
|
|
|
bool Context::createImpl(const cl_context_properties *properties,
|
|
const ClDeviceVector &inputDevices,
|
|
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
|
|
void *data, cl_int &errcodeRet) {
|
|
errcodeRet = CL_SUCCESS;
|
|
|
|
auto propertiesCurrent = properties;
|
|
bool interopUserSync = false;
|
|
int32_t driverDiagnosticsUsed = -1;
|
|
auto sharingBuilder = sharingFactory.build();
|
|
|
|
std::unique_ptr<DriverDiagnostics> driverDiagnostics;
|
|
while (propertiesCurrent && *propertiesCurrent) {
|
|
errcodeRet = CL_SUCCESS;
|
|
|
|
auto propertyType = propertiesCurrent[0];
|
|
auto propertyValue = propertiesCurrent[1];
|
|
propertiesCurrent += 2;
|
|
|
|
switch (propertyType) {
|
|
case CL_CONTEXT_PLATFORM:
|
|
break;
|
|
case CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL:
|
|
driverDiagnosticsUsed = static_cast<int32_t>(propertyValue);
|
|
break;
|
|
case CL_CONTEXT_INTEROP_USER_SYNC:
|
|
interopUserSync = propertyValue > 0;
|
|
break;
|
|
default:
|
|
if (!sharingBuilder->processProperties(propertyType, propertyValue)) {
|
|
errcodeRet = CL_INVALID_PROPERTY;
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
auto numProperties = ptrDiff(propertiesCurrent, properties) / sizeof(cl_context_properties);
|
|
cl_context_properties *propertiesNew = nullptr;
|
|
|
|
// copy the user properties if there are any
|
|
if (numProperties) {
|
|
propertiesNew = new cl_context_properties[numProperties + 1];
|
|
memcpy_s(propertiesNew, (numProperties + 1) * sizeof(cl_context_properties), properties, numProperties * sizeof(cl_context_properties));
|
|
propertiesNew[numProperties] = 0;
|
|
numProperties++;
|
|
}
|
|
|
|
if (debugManager.flags.PrintDriverDiagnostics.get() != -1) {
|
|
driverDiagnosticsUsed = debugManager.flags.PrintDriverDiagnostics.get();
|
|
}
|
|
if (driverDiagnosticsUsed >= 0) {
|
|
driverDiagnostics.reset(new DriverDiagnostics((cl_diagnostics_verbose_level)driverDiagnosticsUsed));
|
|
}
|
|
|
|
this->numProperties = numProperties;
|
|
this->properties = propertiesNew;
|
|
this->setInteropUserSyncEnabled(interopUserSync);
|
|
|
|
if (!sharingBuilder->finalizeProperties(*this, errcodeRet)) {
|
|
return false;
|
|
}
|
|
|
|
bool containsDeviceWithSubdevices = false;
|
|
for (const auto &device : inputDevices) {
|
|
rootDeviceIndices.pushUnique(device->getRootDeviceIndex());
|
|
containsDeviceWithSubdevices |= device->getNumGenericSubDevices() > 1;
|
|
}
|
|
|
|
this->driverDiagnostics = driverDiagnostics.release();
|
|
if (rootDeviceIndices.size() > 1 && containsDeviceWithSubdevices && !debugManager.flags.EnableMultiRootDeviceContexts.get()) {
|
|
DEBUG_BREAK_IF("No support for context with multiple devices with subdevices");
|
|
errcodeRet = CL_OUT_OF_HOST_MEMORY;
|
|
return false;
|
|
}
|
|
|
|
devices = inputDevices;
|
|
for (auto &rootDeviceIndex : rootDeviceIndices) {
|
|
DeviceBitfield deviceBitfield{};
|
|
for (const auto &pDevice : devices) {
|
|
if (pDevice->getRootDeviceIndex() == rootDeviceIndex) {
|
|
deviceBitfield |= pDevice->getDeviceBitfield();
|
|
}
|
|
for (auto &engine : pDevice->getDevice().getAllEngines()) {
|
|
engine.commandStreamReceiver->ensureTagAllocationForRootDeviceIndex(rootDeviceIndex);
|
|
}
|
|
}
|
|
deviceBitfields.insert({rootDeviceIndex, deviceBitfield});
|
|
}
|
|
|
|
if (devices.size() > 0) {
|
|
maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less<uint32_t const>());
|
|
specialQueues.resize(maxRootDeviceIndex + 1u);
|
|
auto device = this->getDevice(0);
|
|
this->memoryManager = device->getMemoryManager();
|
|
if (memoryManager->isAsyncDeleterEnabled()) {
|
|
memoryManager->getDeferredDeleter()->addClient();
|
|
}
|
|
|
|
bool anySvmSupport = false;
|
|
for (auto &device : devices) {
|
|
device->incRefInternal();
|
|
anySvmSupport |= device->getHardwareInfo().capabilityTable.ftrSvm;
|
|
}
|
|
|
|
setupContextType();
|
|
if (anySvmSupport) {
|
|
this->svmAllocsManager = new SVMAllocsManager(this->memoryManager,
|
|
this->areMultiStorageAllocationsPreferred());
|
|
this->svmAllocsManager->initUsmAllocationsCaches(device->getDevice());
|
|
auto requiresWritableStaging = device->getDefaultEngine().commandStreamReceiver->getType() != CommandStreamReceiverType::hardware;
|
|
this->stagingBufferManager = std::make_unique<StagingBufferManager>(svmAllocsManager, rootDeviceIndices, deviceBitfields, requiresWritableStaging);
|
|
}
|
|
|
|
smallBufferPoolAllocator.setParams(SmallBuffersParams::getPreferredBufferPoolParams(device->getProductHelper()));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
cl_int Context::getInfo(cl_context_info paramName, size_t paramValueSize,
|
|
void *paramValue, size_t *paramValueSizeRet) {
|
|
cl_int retVal;
|
|
size_t valueSize = GetInfo::invalidSourceSize;
|
|
const void *pValue = nullptr;
|
|
cl_uint numDevices;
|
|
cl_uint refCount = 0;
|
|
std::vector<cl_device_id> devIDs;
|
|
auto callGetinfo = true;
|
|
|
|
switch (paramName) {
|
|
case CL_CONTEXT_DEVICES:
|
|
valueSize = devices.size() * sizeof(cl_device_id);
|
|
devices.toDeviceIDs(devIDs);
|
|
pValue = devIDs.data();
|
|
break;
|
|
|
|
case CL_CONTEXT_NUM_DEVICES:
|
|
numDevices = (cl_uint)(devices.size());
|
|
valueSize = sizeof(numDevices);
|
|
pValue = &numDevices;
|
|
break;
|
|
|
|
case CL_CONTEXT_PROPERTIES:
|
|
valueSize = this->numProperties * sizeof(cl_context_properties);
|
|
pValue = this->properties;
|
|
if (valueSize == 0) {
|
|
callGetinfo = false;
|
|
}
|
|
|
|
break;
|
|
|
|
case CL_CONTEXT_REFERENCE_COUNT:
|
|
refCount = static_cast<cl_uint>(this->getReference());
|
|
valueSize = sizeof(refCount);
|
|
pValue = &refCount;
|
|
break;
|
|
|
|
default:
|
|
pValue = getOsContextInfo(paramName, &valueSize);
|
|
break;
|
|
}
|
|
|
|
GetInfoStatus getInfoStatus = GetInfoStatus::success;
|
|
if (callGetinfo) {
|
|
getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pValue, valueSize);
|
|
}
|
|
|
|
retVal = changeGetInfoStatusToCLResultType(getInfoStatus);
|
|
GetInfo::setParamValueReturnSize(paramValueSizeRet, valueSize, getInfoStatus);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
size_t Context::getNumDevices() const {
|
|
return devices.size();
|
|
}
|
|
|
|
bool Context::containsMultipleSubDevices(uint32_t rootDeviceIndex) const {
|
|
return deviceBitfields.at(rootDeviceIndex).count() > 1;
|
|
}
|
|
|
|
ClDevice *Context::getDevice(size_t deviceOrdinal) const {
|
|
return (ClDevice *)devices[deviceOrdinal];
|
|
}
|
|
|
|
cl_int Context::getSupportedImageFormats(
|
|
Device *device,
|
|
cl_mem_flags flags,
|
|
cl_mem_object_type imageType,
|
|
cl_uint numEntries,
|
|
cl_image_format *imageFormats,
|
|
cl_uint *numImageFormatsReturned) {
|
|
size_t numImageFormats = 0;
|
|
|
|
const bool nv12ExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().nv12Extension;
|
|
const bool packedYuvExtensionEnabled = device->getSpecializedDevice<ClDevice>()->getDeviceInfo().packedYuvExtension;
|
|
|
|
auto appendImageFormats = [&](ArrayRef<const ClSurfaceFormatInfo> formats) {
|
|
if (imageFormats) {
|
|
size_t offset = numImageFormats;
|
|
for (size_t i = 0; i < formats.size() && offset < numEntries; ++i) {
|
|
imageFormats[offset++] = formats[i].oclImageFormat;
|
|
}
|
|
}
|
|
numImageFormats += formats.size();
|
|
};
|
|
|
|
if (flags & CL_MEM_READ_ONLY) {
|
|
if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
|
|
appendImageFormats(SurfaceFormats::readOnly20());
|
|
} else {
|
|
appendImageFormats(SurfaceFormats::readOnly12());
|
|
}
|
|
if (Image::isImage2d(imageType) && nv12ExtensionEnabled) {
|
|
appendImageFormats(SurfaceFormats::planarYuv());
|
|
}
|
|
if (Image::isImage2dOr2dArray(imageType)) {
|
|
appendImageFormats(SurfaceFormats::readOnlyDepth());
|
|
}
|
|
if (Image::isImage2d(imageType) && packedYuvExtensionEnabled) {
|
|
appendImageFormats(SurfaceFormats::packedYuv());
|
|
}
|
|
} else if (flags & CL_MEM_WRITE_ONLY) {
|
|
appendImageFormats(SurfaceFormats::writeOnly());
|
|
if (Image::isImage2dOr2dArray(imageType)) {
|
|
appendImageFormats(SurfaceFormats::readWriteDepth());
|
|
}
|
|
} else if (nv12ExtensionEnabled && (flags & CL_MEM_NO_ACCESS_INTEL)) {
|
|
if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) {
|
|
appendImageFormats(SurfaceFormats::readOnly20());
|
|
} else {
|
|
appendImageFormats(SurfaceFormats::readOnly12());
|
|
}
|
|
if (Image::isImage2d(imageType)) {
|
|
appendImageFormats(SurfaceFormats::planarYuv());
|
|
}
|
|
} else {
|
|
appendImageFormats(SurfaceFormats::readWrite());
|
|
if (Image::isImage2dOr2dArray(imageType)) {
|
|
appendImageFormats(SurfaceFormats::readWriteDepth());
|
|
}
|
|
}
|
|
if (numImageFormatsReturned) {
|
|
*numImageFormatsReturned = static_cast<cl_uint>(numImageFormats);
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
bool Context::isDeviceAssociated(const ClDevice &clDevice) const {
|
|
for (const auto &pDevice : devices) {
|
|
if (pDevice == &clDevice) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
ClDevice *Context::getSubDeviceByIndex(uint32_t subDeviceIndex) const {
|
|
|
|
auto isExpectedSubDevice = [subDeviceIndex](ClDevice *pClDevice) -> bool {
|
|
bool isSubDevice = (pClDevice->getDeviceInfo().parentDevice != nullptr);
|
|
if (isSubDevice == false) {
|
|
return false;
|
|
}
|
|
|
|
auto &subDevice = static_cast<SubDevice &>(pClDevice->getDevice());
|
|
return (subDevice.getSubDeviceIndex() == subDeviceIndex);
|
|
};
|
|
|
|
auto foundDeviceIterator = std::find_if(devices.begin(), devices.end(), isExpectedSubDevice);
|
|
return (foundDeviceIterator != devices.end() ? *foundDeviceIterator : nullptr);
|
|
}
|
|
|
|
AsyncEventsHandler &Context::getAsyncEventsHandler() const {
|
|
return *static_cast<ClExecutionEnvironment *>(devices[0]->getExecutionEnvironment())->getAsyncEventsHandler();
|
|
}
|
|
|
|
DeviceBitfield Context::getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const {
|
|
return deviceBitfields.at(rootDeviceIndex);
|
|
}
|
|
|
|
void Context::setupContextType() {
|
|
if (contextType == ContextType::CONTEXT_TYPE_DEFAULT) {
|
|
if (devices.size() > 1) {
|
|
for (const auto &pDevice : devices) {
|
|
if (!pDevice->getDeviceInfo().parentDevice) {
|
|
contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
if (devices[0]->getDeviceInfo().parentDevice) {
|
|
contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
|
}
|
|
}
|
|
}
|
|
|
|
Platform *Context::getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode) {
|
|
errcode = CL_SUCCESS;
|
|
auto propertiesCurrent = properties;
|
|
while (propertiesCurrent && *propertiesCurrent) {
|
|
auto propertyType = propertiesCurrent[0];
|
|
auto propertyValue = propertiesCurrent[1];
|
|
propertiesCurrent += 2;
|
|
if (CL_CONTEXT_PLATFORM == propertyType) {
|
|
Platform *pPlatform = nullptr;
|
|
errcode = validateObject(withCastToInternal(reinterpret_cast<cl_platform_id>(propertyValue), &pPlatform));
|
|
return pPlatform;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool Context::isSingleDeviceContext() {
|
|
return getNumDevices() == 1 && devices[0]->getNumGenericSubDevices() == 0;
|
|
}
|
|
|
|
void Context::initializeUsmAllocationPools() {
|
|
if (this->usmPoolInitialized) {
|
|
return;
|
|
}
|
|
auto svmMemoryManager = getSVMAllocsManager();
|
|
if (!(svmMemoryManager && this->isSingleDeviceContext())) {
|
|
return;
|
|
}
|
|
|
|
TakeOwnershipWrapper<Context> lock(*this);
|
|
if (this->usmPoolInitialized) {
|
|
return;
|
|
}
|
|
|
|
auto &productHelper = getDevices()[0]->getProductHelper();
|
|
bool enabled = ApiSpecificConfig::isDeviceUsmPoolingEnabled() && productHelper.isUsmPoolAllocatorSupported();
|
|
size_t poolSize = 2 * MemoryConstants::megaByte;
|
|
if (debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) {
|
|
enabled = debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0;
|
|
poolSize = debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte;
|
|
}
|
|
if (enabled) {
|
|
auto subDeviceBitfields = getDeviceBitfields();
|
|
auto &neoDevice = devices[0]->getDevice();
|
|
subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield();
|
|
SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::deviceUnifiedMemory, MemoryConstants::pageSize2M,
|
|
getRootDeviceIndices(), subDeviceBitfields);
|
|
memoryProperties.device = &neoDevice;
|
|
usmDeviceMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize, 0u, 1 * MemoryConstants::megaByte);
|
|
}
|
|
|
|
enabled = ApiSpecificConfig::isHostUsmPoolingEnabled() && productHelper.isUsmPoolAllocatorSupported();
|
|
poolSize = 2 * MemoryConstants::megaByte;
|
|
if (debugManager.flags.EnableHostUsmAllocationPool.get() != -1) {
|
|
enabled = debugManager.flags.EnableHostUsmAllocationPool.get() > 0;
|
|
poolSize = debugManager.flags.EnableHostUsmAllocationPool.get() * MemoryConstants::megaByte;
|
|
}
|
|
if (enabled) {
|
|
auto subDeviceBitfields = getDeviceBitfields();
|
|
auto &neoDevice = devices[0]->getDevice();
|
|
subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield();
|
|
SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M,
|
|
getRootDeviceIndices(), subDeviceBitfields);
|
|
usmHostMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize, 0u, 1 * MemoryConstants::megaByte);
|
|
}
|
|
this->usmPoolInitialized = true;
|
|
}
|
|
|
|
void Context::cleanupUsmAllocationPools() {
|
|
usmDeviceMemAllocPool.cleanup();
|
|
usmHostMemAllocPool.cleanup();
|
|
}
|
|
|
|
bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *context) const {
|
|
bool isSupportedForSingleDeviceContexts = false;
|
|
bool isSupportedForAllContexts = false;
|
|
if (context->getNumDevices() > 0) {
|
|
auto ailConfiguration = context->getDevices()[0]->getRootDeviceEnvironment().getAILConfigurationHelper();
|
|
auto &productHelper = context->getDevices()[0]->getProductHelper();
|
|
isSupportedForSingleDeviceContexts = productHelper.isBufferPoolAllocatorSupported() && (ailConfiguration ? ailConfiguration->isBufferPoolEnabled() : true);
|
|
}
|
|
|
|
if (debugManager.flags.ExperimentalSmallBufferPoolAllocator.get() != -1) {
|
|
isSupportedForSingleDeviceContexts = debugManager.flags.ExperimentalSmallBufferPoolAllocator.get() >= 1;
|
|
isSupportedForAllContexts = debugManager.flags.ExperimentalSmallBufferPoolAllocator.get() >= 2;
|
|
}
|
|
|
|
return isSupportedForAllContexts ||
|
|
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
|
|
}
|
|
|
|
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager,
|
|
nullptr,
|
|
SmallBuffersParams::getPreferredBufferPoolParams(context->getDevice(0)->getDevice().getProductHelper())) {
|
|
static constexpr cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL;
|
|
[[maybe_unused]] cl_int errcodeRet{};
|
|
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
|
|
bufferCreateArgs.doNotProvidePerformanceHints = true;
|
|
bufferCreateArgs.makeAllocationLockable = true;
|
|
this->mainStorage.reset(Buffer::create(context,
|
|
flags,
|
|
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
|
|
nullptr,
|
|
bufferCreateArgs,
|
|
errcodeRet));
|
|
if (this->mainStorage) {
|
|
this->chunkAllocator.reset(new HeapAllocator(params.startingOffset,
|
|
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
|
|
context->getBufferPoolAllocator().getParams().chunkAlignment));
|
|
context->decRefInternal();
|
|
}
|
|
}
|
|
|
|
const StackVec<NEO::GraphicsAllocation *, 1> &Context::BufferPool::getAllocationsVector() {
|
|
return this->mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations();
|
|
}
|
|
|
|
Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
|
|
cl_mem_flags flags,
|
|
cl_mem_flags_intel flagsIntel,
|
|
size_t requestedSize,
|
|
void *hostPtr,
|
|
cl_int &errcodeRet) {
|
|
cl_buffer_region bufferRegion{};
|
|
size_t actualSize = requestedSize;
|
|
bufferRegion.origin = static_cast<size_t>(this->chunkAllocator->allocate(actualSize));
|
|
if (bufferRegion.origin == 0) {
|
|
return nullptr;
|
|
}
|
|
bufferRegion.origin -= params.startingOffset;
|
|
bufferRegion.size = requestedSize;
|
|
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
|
bufferFromPool->createFunction = this->mainStorage->createFunction;
|
|
bufferFromPool->setSizeInPoolAllocator(actualSize);
|
|
return bufferFromPool;
|
|
}
|
|
|
|
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
|
|
this->context = context;
|
|
auto &device = context->getDevice(0)->getDevice();
|
|
if (device.requestPoolCreate(1u)) {
|
|
this->addNewBufferPool(Context::BufferPool{this->context});
|
|
}
|
|
}
|
|
|
|
Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryProperties &memoryProperties,
|
|
cl_mem_flags flags,
|
|
cl_mem_flags_intel flagsIntel,
|
|
size_t requestedSize,
|
|
void *hostPtr,
|
|
cl_int &errcodeRet) {
|
|
errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
|
if (this->bufferPools.empty() ||
|
|
!this->isSizeWithinThreshold(requestedSize) ||
|
|
!flagsAllowBufferFromPool(flags, flagsIntel)) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto lock = std::unique_lock<std::mutex>(mutex);
|
|
auto bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
|
if (bufferFromPool != nullptr) {
|
|
return bufferFromPool;
|
|
}
|
|
|
|
this->drain();
|
|
|
|
bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
|
if (bufferFromPool != nullptr) {
|
|
for (const auto rootDeviceIndex : this->context->getRootDeviceIndices()) {
|
|
auto cmdQ = this->context->getSpecialQueue(rootDeviceIndex);
|
|
if (cmdQ->getDevice().getProductHelper().isDcFlushMitigated()) {
|
|
auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
|
|
auto lock = csr.obtainUniqueOwnership();
|
|
csr.registerDcFlushForDcMitigation();
|
|
csr.flushTagUpdate();
|
|
}
|
|
}
|
|
|
|
return bufferFromPool;
|
|
}
|
|
|
|
auto &device = context->getDevice(0)->getDevice();
|
|
if (device.requestPoolCreate(1u)) {
|
|
this->addNewBufferPool(BufferPool{this->context});
|
|
return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &memoryProperties,
|
|
cl_mem_flags flags,
|
|
cl_mem_flags_intel flagsIntel,
|
|
size_t requestedSize,
|
|
void *hostPtr,
|
|
cl_int &errcodeRet) {
|
|
for (auto &bufferPoolParent : this->bufferPools) {
|
|
auto &bufferPool = static_cast<BufferPool &>(bufferPoolParent);
|
|
auto bufferFromPool = bufferPool.allocate(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
|
if (bufferFromPool != nullptr) {
|
|
return bufferFromPool;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() {
|
|
return multiRootDeviceTimestampPacketAllocator.get();
|
|
}
|
|
void Context::setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator) {
|
|
multiRootDeviceTimestampPacketAllocator = std::move(allocator);
|
|
}
|
|
|
|
std::unique_lock<std::mutex> Context::obtainOwnershipForMultiRootDeviceAllocator() {
|
|
return std::unique_lock<std::mutex>(multiRootDeviceAllocatorMtx);
|
|
}
|
|
|
|
void Context::setContextAsNonZebin() {
|
|
this->nonZebinContext = true;
|
|
}
|
|
|
|
bool Context::checkIfContextIsNonZebin() const {
|
|
return this->nonZebinContext;
|
|
}
|
|
|
|
StagingBufferManager *Context::getStagingBufferManager() const {
|
|
return this->stagingBufferManager.get();
|
|
}
|
|
|
|
} // namespace NEO
|