compute-runtime/opencl/source/context/context.h

286 lines
11 KiB
C++

/*
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/string.h"
#include "shared/source/utilities/buffer_pool_allocator.h"
#include "shared/source/utilities/stackvec.h"
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/cl_device/cl_device_vector.h"
#include "opencl/source/context/context_type.h"
#include "opencl/source/context/driver_diagnostics.h"
#include "opencl/source/gtpin/gtpin_notify.h"
#include "opencl/source/helpers/base_object.h"
#include "opencl/source/helpers/destructor_callbacks.h"
#include "opencl/source/mem_obj/map_operations_handler.h"
#include <map>
enum class InternalMemoryType : uint32_t;
namespace NEO {
struct MemoryProperties;
class HeapAllocator;
class AsyncEventsHandler;
class CommandQueue;
class Device;
class Kernel;
class MemoryManager;
class SharingFunctions;
class SVMAllocsManager;
class Program;
class Platform;
class TagAllocatorBase;
template <>
struct OpenCLObjectMapper<_cl_context> {
typedef class Context DerivedType;
};
class Context : public BaseObject<_cl_context> {
public:
using BufferAllocationsVec = StackVec<GraphicsAllocation *, 1>;
struct BufferPool : public AbstractBuffersPool<BufferPool, Buffer, MemObj> {
using BaseType = AbstractBuffersPool<BufferPool, Buffer, MemObj>;
BufferPool(Context *context);
Buffer *allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
const StackVec<NEO::GraphicsAllocation *, 1> &getAllocationsVector();
};
class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> {
public:
bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context);
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
protected:
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
Context *context{nullptr};
};
static const cl_ulong objectMagic = 0xA4234321DC002130LL;
bool createImpl(const cl_context_properties *properties,
const ClDeviceVector &devices,
void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *),
void *userData, cl_int &errcodeRet);
template <typename T>
static T *create(const cl_context_properties *properties,
const ClDeviceVector &devices,
void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *),
void *data, cl_int &errcodeRet) {
auto pContext = new T(funcNotify, data);
if (!pContext->createImpl(properties, devices, funcNotify, data, errcodeRet)) {
delete pContext;
pContext = nullptr;
} else {
auto &bufferPoolAllocator = pContext->getBufferPoolAllocator();
if (bufferPoolAllocator.isAggregatedSmallBuffersEnabled(pContext)) {
bufferPoolAllocator.initAggregatedSmallBuffers(pContext);
}
}
gtpinNotifyContextCreate(pContext);
return pContext;
}
Context &operator=(const Context &) = delete;
Context(const Context &) = delete;
~Context() override;
cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *),
void *userData);
cl_int getInfo(cl_context_info paramName, size_t paramValueSize,
void *paramValue, size_t *paramValueSizeRet);
cl_int getSupportedImageFormats(Device *device, cl_mem_flags flags,
cl_mem_object_type imageType, cl_uint numEntries,
cl_image_format *imageFormats, cl_uint *numImageFormats);
size_t getNumDevices() const;
bool containsMultipleSubDevices(uint32_t rootDeviceIndex) const;
ClDevice *getDevice(size_t deviceOrdinal) const;
MemoryManager *getMemoryManager() const {
return memoryManager;
}
SVMAllocsManager *getSVMAllocsManager() const {
return svmAllocsManager;
}
auto &getMapOperationsStorage() { return mapOperationsStorage; }
cl_int tryGetExistingHostPtrAllocation(const void *ptr,
size_t size,
uint32_t rootDeviceIndex,
GraphicsAllocation *&allocation,
InternalMemoryType &memoryType,
bool &isCpuCopyAllowed);
cl_int tryGetExistingSvmAllocation(const void *ptr,
size_t size,
uint32_t rootDeviceIndex,
GraphicsAllocation *&allocation,
InternalMemoryType &memoryType,
bool &isCpuCopyAllowed);
cl_int tryGetExistingMapAllocation(const void *ptr,
size_t size,
GraphicsAllocation *&allocation);
const RootDeviceIndicesContainer &getRootDeviceIndices() const;
uint32_t getMaxRootDeviceIndex() const;
CommandQueue *getSpecialQueue(uint32_t rootDeviceIndex);
void setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex);
void overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex);
template <typename Sharing>
Sharing *getSharing();
template <typename Sharing>
void registerSharing(Sharing *sharing);
template <typename... Args>
void providePerformanceHint(cl_diagnostics_verbose_level flags, PerformanceHints performanceHint, Args &&...args) {
DEBUG_BREAK_IF(contextCallback == nullptr);
DEBUG_BREAK_IF(driverDiagnostics == nullptr);
char hint[DriverDiagnostics::maxHintStringSize];
snprintf_s(hint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[performanceHint], std::forward<Args>(args)..., 0);
if (driverDiagnostics->validFlags(flags)) {
if (contextCallback) {
contextCallback(hint, &flags, sizeof(flags), userData);
}
if (debugManager.flags.PrintDriverDiagnostics.get() != -1) {
printf("\n%s\n", hint);
}
}
}
template <typename... Args>
void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&...args) {
cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL
: CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL;
PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired);
providePerformanceHint(verboseLevel, hint, args...);
}
cl_bool isProvidingPerformanceHints() const {
return driverDiagnostics != nullptr;
}
bool getInteropUserSyncEnabled() { return interopUserSync; }
void setInteropUserSyncEnabled(bool enabled) { interopUserSync = enabled; }
bool areMultiStorageAllocationsPreferred();
bool isSingleDeviceContext();
ContextType peekContextType() const { return contextType; }
bool isDeviceAssociated(const ClDevice &clDevice) const;
ClDevice *getSubDeviceByIndex(uint32_t subDeviceIndex) const;
AsyncEventsHandler &getAsyncEventsHandler() const;
DeviceBitfield getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const;
bool getResolvesRequiredInKernels() const {
return resolvesRequiredInKernels;
}
void setResolvesRequiredInKernels(bool resolves) {
resolvesRequiredInKernels = resolves;
}
const ClDeviceVector &getDevices() const {
return devices;
}
const std::map<uint32_t, DeviceBitfield> &getDeviceBitfields() const { return deviceBitfields; };
static Platform *getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode);
BufferPoolAllocator &getBufferPoolAllocator() {
return smallBufferPoolAllocator;
}
TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator();
std::unique_lock<std::mutex> obtainOwnershipForMultiRootDeviceAllocator();
void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator);
void setContextAsNonZebin();
bool checkIfContextIsNonZebin() const;
protected:
struct BuiltInKernel {
const char *pSource = nullptr;
Program *pProgram = nullptr;
std::once_flag programIsInitialized; // guard for creating+building the program
Kernel *pKernel = nullptr;
BuiltInKernel() {
}
};
Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr,
void *userData = nullptr);
// OS specific implementation
void *getOsContextInfo(cl_context_info &paramName, size_t *srcParamSize);
void setupContextType();
RootDeviceIndicesContainer rootDeviceIndices;
std::map<uint32_t, DeviceBitfield> deviceBitfields;
std::vector<std::unique_ptr<SharingFunctions>> sharingFunctions;
ClDeviceVector devices;
ContextDestructorCallbacks destructorCallbacks;
const cl_context_properties *properties = nullptr;
size_t numProperties = 0u;
void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *) = nullptr;
void *userData = nullptr;
MemoryManager *memoryManager = nullptr;
SVMAllocsManager *svmAllocsManager = nullptr;
MapOperationsStorage mapOperationsStorage = {};
StackVec<CommandQueue *, 1> specialQueues;
DriverDiagnostics *driverDiagnostics = nullptr;
BufferPoolAllocator smallBufferPoolAllocator;
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
cl_bool preferD3dSharedResources = 0u;
ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT;
std::unique_ptr<TagAllocatorBase> multiRootDeviceTimestampPacketAllocator;
std::mutex multiRootDeviceAllocatorMtx;
bool interopUserSync = false;
bool resolvesRequiredInKernels = false;
bool nonZebinContext = false;
};
} // namespace NEO