/* * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/queue_helpers.h" namespace NEO { template template void GpgpuWalkerHelper::setSystolicModeEnable(WalkerType *walkerCmd) { } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer); } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer); } template size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { return 0u; } template size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, bool resolveDependenciesByPipecontrol, cl_event *outEvent) { size_t expectedSizeCS = 0; auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper(); auto &commandQueueHw = static_cast &>(commandQueue); auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment(); if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false); } return expectedSizeCS; } for (auto &dispatchInfo : multiDispatchInfo) { expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo); size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0; expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, rootDeviceEnvironment, commandQueueHw.isCacheFlushForBcsRequired()); expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, rootDeviceEnvironment, commandQueueHw.isCacheFlushForBcsRequired()); } auto relaxedOrderingEnabled = commandQueue.getGpgpuCommandStreamReceiver().directSubmissionRelaxedOrderingEnabled(); if (relaxedOrderingEnabled) { expectedSizeCS += 2 * EncodeSetMMIO::sizeREG; } if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { // add relaxed ordering cond_bb_start expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps, relaxedOrderingEnabled); expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); if (resolveDependenciesByPipecontrol) { expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(false); } if (isMarkerWithProfiling) { if (!eventsInWaitlist) { expectedSizeCS += commandQueue.getGpgpuCommandStreamReceiver().getCmdsSizeForComputeBarrierCommand(); } expectedSizeCS += 4 * EncodeStoreMMIO::size; } } else if (isMarkerWithProfiling) { expectedSizeCS += 2 * MemorySynchronizationCommands::getSizeForSingleBarrier(false); if (!gfxCoreHelper.useOnlyGlobalTimestamps()) { expectedSizeCS += 2 * EncodeStoreMMIO::size; } } if (multiDispatchInfo.peekMainKernel()) { expectedSizeCS += EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue); } if (debugManager.flags.PauseOnEnqueue.get() != -1) { expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(false) * 2; expectedSizeCS += NEO::EncodeSemaphore::getSizeMiSemaphoreWait() * 2; } if (debugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) { expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps); if (outEvent) { auto pEvent = castToObjectOrAbort(*outEvent); if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false); } } expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(false); if ((CL_COMMAND_BARRIER == eventType) && !commandQueue.isOOQEnabled() && eventsInWaitlist) { expectedSizeCS += EncodeStoreMemory::getStoreDataImmSize(); } return expectedSizeCS; } template size_t EnqueueOperation::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { if (isCommandWithoutKernel(cmdType)) { return EnqueueOperation::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue); } else { return EnqueueOperation::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel, dispatchInfo); } } template size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) { size_t size = 0; if (reserveProfilingCmdsSpace) { size += 2 * MemorySynchronizationCommands::getSizeForSingleBarrier(false) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } return size; } } // namespace NEO