/* * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/unified_memory_pooling.h" #include "shared/source/utilities/buffer_pool_allocator.h" #include "shared/source/utilities/stackvec.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/context/context_type.h" #include "opencl/source/context/driver_diagnostics.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/destructor_callbacks.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include enum class InternalMemoryType : uint32_t; namespace NEO { struct MemoryProperties; class HeapAllocator; class AsyncEventsHandler; class CommandQueue; class Device; class Kernel; class MemoryManager; class SharingFunctions; class SVMAllocsManager; class Program; class Platform; class TagAllocatorBase; class StagingBufferManager; template <> struct OpenCLObjectMapper<_cl_context> { typedef class Context DerivedType; }; class Context : public BaseObject<_cl_context> { using UsmHostMemAllocPool = UsmMemAllocPool; using UsmDeviceMemAllocPool = UsmMemAllocPool; public: using BufferAllocationsVec = StackVec; struct BufferPool : public AbstractBuffersPool { using BaseType = AbstractBuffersPool; BufferPool(Context *context); Buffer *allocate(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t requestedSize, void *hostPtr, cl_int &errcodeRet); const StackVec &getAllocationsVector(); }; class BufferPoolAllocator : public AbstractBuffersAllocator { public: bool isAggregatedSmallBuffersEnabled(Context *context) const; void initAggregatedSmallBuffers(Context *context); Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t requestedSize, void *hostPtr, cl_int &errcodeRet); bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const; protected: Buffer *allocateFromPools(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t requestedSize, void *hostPtr, cl_int &errcodeRet); static inline size_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) { const auto maxPoolCount = static_cast(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize); return maxPoolCount ? maxPoolCount : 1u; } Context *context{nullptr}; size_t maxPoolCount{1u}; }; static const cl_ulong objectMagic = 0xA4234321DC002130LL; bool createImpl(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *), void *userData, cl_int &errcodeRet); template static T *create(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data, cl_int &errcodeRet) { auto pContext = new T(funcNotify, data); if (!pContext->createImpl(properties, devices, funcNotify, data, errcodeRet)) { delete pContext; pContext = nullptr; } else { auto &bufferPoolAllocator = pContext->getBufferPoolAllocator(); if (bufferPoolAllocator.isAggregatedSmallBuffersEnabled(pContext)) { bufferPoolAllocator.initAggregatedSmallBuffers(pContext); } } gtpinNotifyContextCreate(pContext); return pContext; } Context &operator=(const Context &) = delete; Context(const Context &) = delete; ~Context() override; cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *), void *userData); cl_int getInfo(cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int getSupportedImageFormats(Device *device, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats); size_t getNumDevices() const; bool containsMultipleSubDevices(uint32_t rootDeviceIndex) const; ClDevice *getDevice(size_t deviceOrdinal) const; MemoryManager *getMemoryManager() const { return memoryManager; } SVMAllocsManager *getSVMAllocsManager() const { return svmAllocsManager; } auto &getMapOperationsStorage() { return mapOperationsStorage; } cl_int tryGetExistingHostPtrAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed); cl_int tryGetExistingSvmAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed); cl_int tryGetExistingMapAllocation(const void *ptr, size_t size, GraphicsAllocation *&allocation); const RootDeviceIndicesContainer &getRootDeviceIndices() const; uint32_t getMaxRootDeviceIndex() const; CommandQueue *getSpecialQueue(uint32_t rootDeviceIndex); void setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex); void overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex); template Sharing *getSharing(); template void registerSharing(Sharing *sharing); template void providePerformanceHint(cl_diagnostics_verbose_level flags, PerformanceHints performanceHint, Args &&...args) { DEBUG_BREAK_IF(contextCallback == nullptr); DEBUG_BREAK_IF(driverDiagnostics == nullptr); char hint[DriverDiagnostics::maxHintStringSize]; snprintf_s(hint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[performanceHint], std::forward(args)..., 0); if (driverDiagnostics->validFlags(flags)) { if (contextCallback) { contextCallback(hint, &flags, sizeof(flags), userData); } if (debugManager.flags.PrintDriverDiagnostics.get() != -1) { printf("\n%s\n", hint); } } } template void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&...args) { cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL : CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL; PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired); providePerformanceHint(verboseLevel, hint, args...); } cl_bool isProvidingPerformanceHints() const { return driverDiagnostics != nullptr; } bool getInteropUserSyncEnabled() { return interopUserSync; } void setInteropUserSyncEnabled(bool enabled) { interopUserSync = enabled; } bool areMultiStorageAllocationsPreferred(); bool isSingleDeviceContext(); ContextType peekContextType() const { return contextType; } bool isDeviceAssociated(const ClDevice &clDevice) const; ClDevice *getSubDeviceByIndex(uint32_t subDeviceIndex) const; AsyncEventsHandler &getAsyncEventsHandler() const; DeviceBitfield getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const; bool getResolvesRequiredInKernels() const { return resolvesRequiredInKernels; } void setResolvesRequiredInKernels(bool resolves) { resolvesRequiredInKernels = resolves; } const ClDeviceVector &getDevices() const { return devices; } const std::map &getDeviceBitfields() const { return deviceBitfields; }; static Platform *getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode); BufferPoolAllocator &getBufferPoolAllocator() { return smallBufferPoolAllocator; } UsmMemAllocPool &getDeviceMemAllocPool() { return usmDeviceMemAllocPool; } UsmMemAllocPool &getHostMemAllocPool() { return usmHostMemAllocPool; } TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator(); std::unique_lock obtainOwnershipForMultiRootDeviceAllocator(); void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator); void setContextAsNonZebin(); bool checkIfContextIsNonZebin() const; void initializeUsmAllocationPools(); void cleanupUsmAllocationPools(); StagingBufferManager *getStagingBufferManager() const; protected: struct BuiltInKernel { const char *pSource = nullptr; Program *pProgram = nullptr; std::once_flag programIsInitialized; // guard for creating+building the program Kernel *pKernel = nullptr; BuiltInKernel() { } }; Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr, void *userData = nullptr); // OS specific implementation void *getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize); void setupContextType(); RootDeviceIndicesContainer rootDeviceIndices; std::map deviceBitfields; std::vector> sharingFunctions; ClDeviceVector devices; ContextDestructorCallbacks destructorCallbacks; const cl_context_properties *properties = nullptr; size_t numProperties = 0u; void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *) = nullptr; void *userData = nullptr; MemoryManager *memoryManager = nullptr; SVMAllocsManager *svmAllocsManager = nullptr; MapOperationsStorage mapOperationsStorage = {}; StackVec specialQueues; DriverDiagnostics *driverDiagnostics = nullptr; BufferPoolAllocator smallBufferPoolAllocator; UsmDeviceMemAllocPool usmDeviceMemAllocPool; UsmHostMemAllocPool usmHostMemAllocPool; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); cl_bool preferD3dSharedResources = 0u; ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT; std::unique_ptr multiRootDeviceTimestampPacketAllocator; std::mutex multiRootDeviceAllocatorMtx; std::unique_ptr stagingBufferManager; bool interopUserSync = false; bool resolvesRequiredInKernels = false; bool nonZebinContext = false; bool usmPoolInitialized = false; }; } // namespace NEO