2018-10-02 15:09:06 +02:00
|
|
|
/*
|
2025-02-18 17:50:26 +00:00
|
|
|
* Copyright (C) 2018-2025 Intel Corporation
|
2018-10-02 15:09:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2021-06-14 16:35:48 +00:00
|
|
|
#include "shared/source/command_container/command_encoder.h"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
|
|
|
#include "shared/source/helpers/aligned_memory.h"
|
|
|
|
|
#include "shared/source/helpers/engine_node_helper.h"
|
2023-02-01 16:23:01 +00:00
|
|
|
#include "shared/source/helpers/gfx_core_helper.h"
|
2023-01-20 13:01:19 +00:00
|
|
|
#include "shared/source/helpers/timestamp_packet.h"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/os_interface/os_context.h"
|
|
|
|
|
#include "shared/source/utilities/tag_allocator.h"
|
2020-02-24 10:22:30 +01:00
|
|
|
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/command_queue/command_queue.h"
|
2020-02-28 09:07:07 +01:00
|
|
|
#include "opencl/source/command_queue/command_queue_hw.h"
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/command_queue/gpgpu_walker.h"
|
|
|
|
|
#include "opencl/source/event/user_event.h"
|
|
|
|
|
#include "opencl/source/helpers/queue_helpers.h"
|
2018-10-02 15:09:06 +02:00
|
|
|
|
2019-03-26 11:59:46 +01:00
|
|
|
namespace NEO {
|
2018-10-02 15:09:06 +02:00
|
|
|
|
2024-09-20 13:14:43 +00:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <typename WalkerType>
|
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::setSystolicModeEnable(WalkerType *walkerCmd) {
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-13 14:15:03 +02:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
|
|
|
|
CommandQueue &commandQueue,
|
2021-03-24 18:21:13 +00:00
|
|
|
TagNodeBase &hwPerfCounter,
|
2019-05-13 14:15:03 +02:00
|
|
|
LinearStream *commandStream) {
|
|
|
|
|
|
2019-11-13 14:48:44 +01:00
|
|
|
const auto pPerformanceCounters = commandQueue.getPerfCounters();
|
2020-02-21 15:25:04 +01:00
|
|
|
const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType())
|
2019-11-13 14:48:44 +01:00
|
|
|
? MetricsLibraryApi::GpuCommandBufferType::Compute
|
|
|
|
|
: MetricsLibraryApi::GpuCommandBufferType::Render;
|
|
|
|
|
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true);
|
2019-05-20 11:19:27 +02:00
|
|
|
void *pBuffer = commandStream->getSpace(size);
|
2019-05-13 14:15:03 +02:00
|
|
|
|
2019-11-13 14:48:44 +01:00
|
|
|
pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer);
|
2019-05-13 14:15:03 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
|
|
|
|
|
CommandQueue &commandQueue,
|
2021-03-24 18:21:13 +00:00
|
|
|
TagNodeBase &hwPerfCounter,
|
2019-05-13 14:15:03 +02:00
|
|
|
LinearStream *commandStream) {
|
|
|
|
|
|
2019-11-13 14:48:44 +01:00
|
|
|
const auto pPerformanceCounters = commandQueue.getPerfCounters();
|
2020-02-21 15:25:04 +01:00
|
|
|
const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType())
|
2019-11-13 14:48:44 +01:00
|
|
|
? MetricsLibraryApi::GpuCommandBufferType::Compute
|
|
|
|
|
: MetricsLibraryApi::GpuCommandBufferType::Render;
|
|
|
|
|
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false);
|
2019-05-20 11:19:27 +02:00
|
|
|
void *pBuffer = commandStream->getSpace(size);
|
2019-05-13 14:15:03 +02:00
|
|
|
|
2019-11-13 14:48:44 +01:00
|
|
|
pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer);
|
2019-05-13 14:15:03 +02:00
|
|
|
}
|
|
|
|
|
|
2020-10-09 12:27:32 +02:00
|
|
|
template <typename GfxFamily>
|
2021-03-22 15:26:03 +00:00
|
|
|
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
|
2020-10-09 12:27:32 +02:00
|
|
|
return 0u;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-13 14:15:03 +02:00
|
|
|
template <typename GfxFamily>
|
2023-03-24 09:59:12 +01:00
|
|
|
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, bool resolveDependenciesByPipecontrol, cl_event *outEvent) {
|
2020-02-28 09:07:07 +01:00
|
|
|
size_t expectedSizeCS = 0;
|
2022-12-12 13:21:09 +00:00
|
|
|
auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper();
|
2022-12-28 22:59:37 +00:00
|
|
|
|
2020-03-09 13:48:30 +01:00
|
|
|
auto &commandQueueHw = static_cast<CommandQueueHw<GfxFamily> &>(commandQueue);
|
2023-01-26 03:58:18 +00:00
|
|
|
auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment();
|
2020-02-28 09:07:07 +01:00
|
|
|
|
2019-07-03 09:30:30 +02:00
|
|
|
if (blitEnqueue) {
|
2020-02-28 09:07:07 +01:00
|
|
|
size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
|
|
|
|
|
if (commandQueueHw.isCacheFlushForBcsRequired()) {
|
2025-05-26 12:06:15 +00:00
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
|
2020-02-28 09:07:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return expectedSizeCS;
|
2019-07-03 09:30:30 +02:00
|
|
|
}
|
2020-02-28 09:07:07 +01:00
|
|
|
|
2019-05-13 14:15:03 +02:00
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
2021-03-03 17:29:32 +00:00
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
|
2020-12-22 00:03:25 +00:00
|
|
|
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
|
2023-01-26 03:58:18 +00:00
|
|
|
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, rootDeviceEnvironment, commandQueueHw.isCacheFlushForBcsRequired());
|
|
|
|
|
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, rootDeviceEnvironment, commandQueueHw.isCacheFlushForBcsRequired());
|
2019-05-13 14:15:03 +02:00
|
|
|
}
|
2023-03-09 18:29:45 +00:00
|
|
|
|
|
|
|
|
auto relaxedOrderingEnabled = commandQueue.getGpgpuCommandStreamReceiver().directSubmissionRelaxedOrderingEnabled();
|
|
|
|
|
|
|
|
|
|
if (relaxedOrderingEnabled) {
|
|
|
|
|
expectedSizeCS += 2 * EncodeSetMMIO<GfxFamily>::sizeREG;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 14:28:09 +02:00
|
|
|
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
2023-03-09 18:29:45 +00:00
|
|
|
// add relaxed ordering cond_bb_start
|
|
|
|
|
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps, relaxedOrderingEnabled);
|
2019-07-03 09:30:30 +02:00
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
2023-03-24 09:59:12 +01:00
|
|
|
if (resolveDependenciesByPipecontrol) {
|
2025-05-30 13:39:42 +02:00
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
2022-11-29 13:28:05 +00:00
|
|
|
}
|
2021-06-14 16:35:48 +00:00
|
|
|
if (isMarkerWithProfiling) {
|
2021-07-19 15:07:12 +00:00
|
|
|
if (!eventsInWaitlist) {
|
2021-12-10 21:31:34 +00:00
|
|
|
expectedSizeCS += commandQueue.getGpgpuCommandStreamReceiver().getCmdsSizeForComputeBarrierCommand();
|
2021-07-19 15:07:12 +00:00
|
|
|
}
|
2021-06-14 16:35:48 +00:00
|
|
|
expectedSizeCS += 4 * EncodeStoreMMIO<GfxFamily>::size;
|
|
|
|
|
}
|
|
|
|
|
} else if (isMarkerWithProfiling) {
|
2025-05-14 18:00:30 +00:00
|
|
|
expectedSizeCS += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
2022-12-12 13:21:09 +00:00
|
|
|
if (!gfxCoreHelper.useOnlyGlobalTimestamps()) {
|
2021-06-14 16:35:48 +00:00
|
|
|
expectedSizeCS += 2 * EncodeStoreMMIO<GfxFamily>::size;
|
|
|
|
|
}
|
2019-05-13 14:15:03 +02:00
|
|
|
}
|
2020-08-19 11:06:01 +02:00
|
|
|
if (multiDispatchInfo.peekMainKernel()) {
|
|
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.PauseOnEnqueue.get() != -1) {
|
2025-05-14 18:00:30 +00:00
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier() * 2;
|
2023-03-10 13:49:06 +00:00
|
|
|
expectedSizeCS += NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() * 2;
|
2020-04-22 16:34:39 +02:00
|
|
|
}
|
2020-04-30 17:12:01 +02:00
|
|
|
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
|
2021-02-24 15:31:58 +00:00
|
|
|
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
|
|
|
|
}
|
2023-01-19 16:11:39 +00:00
|
|
|
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDeps);
|
|
|
|
|
if (outEvent) {
|
|
|
|
|
auto pEvent = castToObjectOrAbort<Event>(*outEvent);
|
|
|
|
|
if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) {
|
2025-05-26 12:06:15 +00:00
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
|
2023-01-19 16:11:39 +00:00
|
|
|
}
|
|
|
|
|
}
|
2025-05-14 18:00:30 +00:00
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
2021-03-11 13:48:04 +00:00
|
|
|
|
2023-03-23 12:05:20 +00:00
|
|
|
if ((CL_COMMAND_BARRIER == eventType) && !commandQueue.isOOQEnabled() && eventsInWaitlist) {
|
|
|
|
|
expectedSizeCS += EncodeStoreMemory<GfxFamily>::getStoreDataImmSize();
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-13 14:15:03 +02:00
|
|
|
return expectedSizeCS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2021-03-03 17:29:32 +00:00
|
|
|
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) {
|
2019-05-13 14:15:03 +02:00
|
|
|
if (isCommandWithoutKernel(cmdType)) {
|
|
|
|
|
return EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue);
|
|
|
|
|
} else {
|
2023-12-04 12:20:54 +00:00
|
|
|
return EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel<typename GfxFamily::DefaultWalkerType>(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel, dispatchInfo);
|
2018-10-02 15:09:06 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2019-05-13 14:15:03 +02:00
|
|
|
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) {
|
|
|
|
|
size_t size = 0;
|
|
|
|
|
if (reserveProfilingCmdsSpace) {
|
2025-05-14 18:00:30 +00:00
|
|
|
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier() + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
2018-10-02 15:09:06 +02:00
|
|
|
}
|
2023-03-23 12:05:20 +00:00
|
|
|
|
2019-05-13 14:15:03 +02:00
|
|
|
return size;
|
2018-10-02 15:09:06 +02:00
|
|
|
}
|
|
|
|
|
|
2025-03-19 12:15:31 +00:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <typename WalkerType>
|
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacketFlushL3(WalkerType *walkerCmd,
|
|
|
|
|
const ProductHelper &productHelper,
|
|
|
|
|
bool flushL3AfterPostSyncForHostUsm,
|
|
|
|
|
bool flushL3AfterPostSyncForExternalAllocation) {
|
|
|
|
|
}
|
2019-03-26 11:59:46 +01:00
|
|
|
} // namespace NEO
|