/* * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/map_operation_type.h" #include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/indirect_heap/indirect_heap_type.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/sku_info/sku_info_base.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/range.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/copy_engine_state.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/properties_helper.h" #include #include namespace NEO { class BarrierCommand; class Buffer; class ClDevice; class Context; class Event; class EventBuilder; class FlushStampTracker; class Image; class IndirectHeap; class Kernel; class LinearStream; class PerformanceCounters; class PrintfHandler; enum class WaitStatus; struct BuiltinOpParams; struct CsrSelectionArgs; struct MultiDispatchInfo; struct TimestampPacketDependencies; struct StagingTransferStatus; enum class QueuePriority { low, medium, high }; template <> struct OpenCLObjectMapper<_cl_command_queue> { typedef class CommandQueue DerivedType; }; class CommandQueue : public BaseObject<_cl_command_queue> { public: static const cl_ulong objectMagic = 0x1234567890987654LL; static CommandQueue *create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage, cl_int &errcodeRet); static cl_int getErrorCodeFromTaskCount(TaskCountType taskCount); CommandQueue() = delete; CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); ~CommandQueue() override; // API entry points virtual cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; MOCKABLE_VIRTUAL void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL void *enqueueMapImage(Image *image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver *csrParam) = 0; virtual cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0; virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadImageImpl(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0; virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBufferImpl(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0; virtual cl_int enqueueWriteImageImpl(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0; virtual cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; cl_int enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); cl_int enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); MOCKABLE_VIRTUAL void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal); virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int finish() = 0; virtual cl_int flush() = 0; void updateFromCompletionStamp(const CompletionStamp &completionStamp, Event *outEvent); virtual bool isCacheFlushCommand(uint32_t commandType) const { return false; } cl_int getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); TagAddressType getHwTag() const; volatile TagAddressType *getHwTagAddress() const; MOCKABLE_VIRTUAL bool isCompleted(TaskCountType gpgpuTaskCount, const Range &bcsStates); bool isWaitForTimestampsEnabled() const; virtual bool waitForTimestamps(Range copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0; MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { return this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false); } MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList); MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) { return this->waitForAllEngines(blockedQueue, printfHandler, true); } static TaskCountType getTaskLevelFromWaitList(TaskCountType taskLevel, cl_uint numEventsInWaitList, const cl_event *eventWaitList); void initializeGpgpu() const; void initializeGpgpuInternals() const; MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType); CommandStreamReceiver *getBcsForAuxTranslation(); MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args); void constructBcsEngine(bool internalUsage); MOCKABLE_VIRTUAL void initializeBcsEngine(bool internalUsage); void constructBcsEnginesForSplit(); void prepareHostPtrSurfaceForSplit(bool split, GraphicsAllocation &allocation); CommandStreamReceiver &selectCsrForHostPtrAllocation(bool split, CommandStreamReceiver &csr); void releaseMainCopyEngine(); Device &getDevice() const noexcept; ClDevice &getClDevice() const { return *device; } Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } EngineControl &getGpgpuEngine() const { this->initializeGpgpu(); return *gpgpuEngine; } MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize); IndirectHeap &getIndirectHeap(IndirectHeapType heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeapType heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); static bool isTimestampWaitEnabled(); MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeapType heapType); void releaseVirtualEvent(); cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; } bool isProfilingEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE); } bool isOOQEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); } bool isPerfCountersEnabled() const { return perfCountersEnabled; } PerformanceCounters *getPerfCounters(); bool setPerfCountersEnabled(); void setIsSpecialCommandQueue(bool newValue) { this->isSpecialCommandQueue = newValue; } bool isSpecial() { return this->isSpecialCommandQueue; } QueuePriority getPriority() const { return priority; } QueueThrottle getThrottle() const { return throttle; } const TimestampPacketContainer *getTimestampPacketContainer() const { return timestampPacketContainer.get(); } const std::vector &getPropertiesVector() const { return propertiesVector; } void enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList, size_t numEventsInWaitlist, MapOperationType opType, MemObj *memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, EventBuilder &externalEventBuilder); MOCKABLE_VIRTUAL bool setupDebugSurface(Kernel *kernel); bool validateCapability(cl_command_queue_capabilities_intel capability) const; bool validateCapabilitiesForEventWaitList(cl_uint numEventsInWaitList, const cl_event *waitList) const; bool validateCapabilityForOperation(cl_command_queue_capabilities_intel capability, cl_uint numEventsInWaitList, const cl_event *waitList, const cl_event *outEvent) const; cl_uint getQueueFamilyIndex() const; cl_uint getQueueIndexWithinFamily() const { return queueIndexWithinFamily; } bool isQueueFamilySelected() const { return queueFamilySelected; } bool getRequiresCacheFlushAfterWalker() const { return requiresCacheFlushAfterWalker; } template static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation); void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, TaskCountType newBcsTaskCount); TaskCountType peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const; void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; } EnqueueProperties::Operation peekLatestSentEnqueueOperation() { return this->latestSentEnqueueType; } void setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies ×tampPacketDependencies); void processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies); void setLastBcsPacket(aub_stream::EngineType bcsEngineType); void fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps); void clearLastBcsPackets(); void setStallingCommandsOnNextFlush(const bool isStallingCommandsOnNextFlushRequired) { stallingCommandsOnNextFlushRequired = isStallingCommandsOnNextFlushRequired; if (!isStallingCommandsOnNextFlushRequired) { dcFlushRequiredOnStallingCommandsOnNextFlush = false; } } bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; } void setDcFlushRequiredOnStallingCommandsOnNextFlush(const bool isDcFlushRequiredOnStallingCommandsOnNextFlush) { dcFlushRequiredOnStallingCommandsOnNextFlush = isDcFlushRequiredOnStallingCommandsOnNextFlush; } bool isDcFlushRequiredOnStallingCommandsOnNextFlush() const { return dcFlushRequiredOnStallingCommandsOnNextFlush; } // taskCount of last task TaskCountType taskCount = 0; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. TaskCountType taskLevel = 0; std::unique_ptr flushStamp; // virtual event that holds last Enqueue information Event *virtualEvent = nullptr; size_t estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const; uint64_t getSliceCount() const { return sliceCount; } TimestampPacketContainer *getDeferredTimestampPackets() const { return deferredTimestampPackets.get(); } uint64_t dispatchHints = 0; bool isTextureCacheFlushNeeded(uint32_t commandType) const; const std::array &peekActiveBcsStates() const { return bcsStates; } void handlePostCompletionOperations(bool checkQueueCompletion); bool getHeaplessModeEnabled() const { return this->heaplessModeEnabled; } bool getHeaplessStateInitEnabled() const { return this->heaplessStateInitEnabled; } bool isBcsSplitInitialized() const { return this->bcsSplitInitialized; } bool isBcs() const { return isCopyOnly; }; cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event); cl_int enqueueStagingImageTransfer(cl_command_type commandType, Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event); cl_int enqueueStagingBufferTransfer(cl_command_type commandType, Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event); bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies); bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies); size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) const; protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); virtual void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const; MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr); void storeProperties(const cl_queue_properties *properties); void processProperties(const cl_queue_properties *properties); void overrideEngine(aub_stream::EngineType engineType, EngineUsage engineUsage); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; bool blitEnqueueAllowed(const CsrSelectionArgs &args) const; MOCKABLE_VIRTUAL bool migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr); inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const { return (commandType == CL_COMMAND_READ_BUFFER || commandType == CL_COMMAND_READ_BUFFER_RECT || commandType == CL_COMMAND_READ_IMAGE || commandType == CL_COMMAND_SVM_MAP || printfHandler || isTextureCacheFlushNeeded(commandType)); } MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); void assignDataToOverwrittenBcsNode(TagNodeBase *node); void registerGpgpuCsrClient(); void registerBcsCsrClient(CommandStreamReceiver &bcsCsr); void unregisterGpgpuCsrClient(); void unregisterBcsCsrClient(CommandStreamReceiver &bcsCsr); void unregisterGpgpuAndBcsCsrClients(); cl_int postStagingTransferSync(const StagingTransferStatus &status, cl_event *event, const cl_event profilingEvent, bool isSingleTransfer, bool isBlocking, cl_command_type commandType); cl_event *assignEventForStaging(cl_event *userEvent, cl_event *profilingEvent, bool isFirstTransfer, bool isLastTransfer) const; Context *context = nullptr; ClDevice *device = nullptr; mutable EngineControl *gpgpuEngine = nullptr; std::array bcsEngines = {}; std::optional bcsQueueEngineType{}; size_t bcsEngineCount = bcsInfoMaskSize; cl_command_queue_properties commandQueueProperties = 0; std::vector propertiesVector; cl_command_queue_capabilities_intel queueCapabilities = CL_QUEUE_DEFAULT_CAPABILITIES_INTEL; cl_uint queueFamilyIndex = 0; cl_uint queueIndexWithinFamily = 0; bool queueFamilySelected = false; QueuePriority priority = QueuePriority::medium; QueueThrottle throttle = QueueThrottle::MEDIUM; EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::none; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; std::array bcsStates = {}; bool perfCountersEnabled = false; bool isInternalUsage = false; bool isCopyOnly = false; bool bcsAllowed = false; bool bcsInitialized = false; bool bcsSplitInitialized = false; BcsInfoMask splitEngines = EngineHelpers::oddLinkedCopyEnginesMask; BcsInfoMask h2dEngines = NEO::EngineHelpers::h2dCopyEngineMask; BcsInfoMask d2hEngines = NEO::EngineHelpers::d2hCopyEngineMask; size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte; LinearStream *commandStream = nullptr; bool isSpecialCommandQueue = false; bool requiresCacheFlushAfterWalker = false; std::unique_ptr deferredTimestampPackets; std::unique_ptr deferredMultiRootSyncNodes; std::unique_ptr timestampPacketContainer; struct BcsTimestampPacketContainers { TimestampPacketContainer lastBarrierToWaitFor; TimestampPacketContainer lastSignalledPacket; }; std::array bcsTimestampPacketContainers; bool stallingCommandsOnNextFlushRequired = false; bool dcFlushRequiredOnStallingCommandsOnNextFlush = false; bool splitBarrierRequired = false; bool gpgpuCsrClientRegistered = false; bool heaplessModeEnabled = false; bool heaplessStateInitEnabled = false; bool isForceStateless = false; }; static_assert(NEO::NonCopyableAndNonMovable); template PtrType CommandQueue::convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) { // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything. // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start. const bool isCpuPtr = (memoryType != InternalMemoryType::deviceUnifiedMemory) && (memoryType != InternalMemoryType::sharedUnifiedMemory); if (isCpuPtr) { size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer()); ptr = reinterpret_cast(allocation.getGpuAddress() + dstOffset); } return ptr; } using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); } // namespace NEO