2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2024-03-08 00:50:57 +08:00
|
|
|
* Copyright (C) 2018-2024 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2018-09-18 15:11:08 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/helpers/task_information.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
|
2020-02-24 05:44:01 +08:00
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
|
|
#include "shared/source/command_stream/csr_deps.h"
|
|
|
|
#include "shared/source/command_stream/linear_stream.h"
|
|
|
|
#include "shared/source/command_stream/preemption.h"
|
2022-03-05 00:23:43 +08:00
|
|
|
#include "shared/source/command_stream/wait_status.h"
|
2023-01-18 23:52:24 +08:00
|
|
|
#include "shared/source/helpers/blit_properties.h"
|
2023-01-11 01:16:08 +08:00
|
|
|
#include "shared/source/helpers/flush_stamp.h"
|
2020-02-24 05:44:01 +08:00
|
|
|
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
|
|
|
#include "shared/source/memory_manager/surface.h"
|
2020-02-24 17:22:30 +08:00
|
|
|
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
2020-03-20 18:15:25 +08:00
|
|
|
#include "opencl/source/cl_device/cl_device.h"
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/command_queue/command_queue.h"
|
|
|
|
#include "opencl/source/command_queue/enqueue_common.h"
|
|
|
|
#include "opencl/source/gtpin/gtpin_notify.h"
|
2021-09-23 06:24:59 +08:00
|
|
|
#include "opencl/source/helpers/cl_preemption_helper.h"
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/helpers/enqueue_properties.h"
|
|
|
|
#include "opencl/source/helpers/task_information.inl"
|
|
|
|
#include "opencl/source/mem_obj/mem_obj.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2019-07-19 03:15:50 +08:00
|
|
|
template void KernelOperation::ResourceCleaner::operator()<LinearStream>(LinearStream *);
|
|
|
|
template void KernelOperation::ResourceCleaner::operator()<IndirectHeap>(IndirectHeap *);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly,
|
|
|
|
CommandQueue &commandQueue)
|
|
|
|
: Command(commandQueue), memObj(memObj), copySize(copySize), copyOffset(copyOffset), readOnly(readOnly), operationType(operationType) {
|
2018-01-05 18:33:30 +08:00
|
|
|
memObj.incRefInternal();
|
|
|
|
}
|
|
|
|
|
2022-11-22 21:53:59 +08:00
|
|
|
CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminated) {
|
2022-03-05 00:23:43 +08:00
|
|
|
DecRefInternalAtScopeEnd decRefInternalAtScopeEnd{memObj};
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
if (terminated) {
|
2021-12-20 22:32:06 +08:00
|
|
|
this->terminated = true;
|
2017-12-21 07:45:38 +08:00
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
|
|
|
|
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
|
|
|
|
auto &queueCommandStream = commandQueue.getCS(0);
|
2017-12-21 07:45:38 +08:00
|
|
|
size_t offset = queueCommandStream.getUsed();
|
2019-11-27 19:59:47 +08:00
|
|
|
MultiDispatchInfo multiDispatch;
|
|
|
|
Device &device = commandQueue.getDevice();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-09-13 18:00:30 +08:00
|
|
|
DispatchFlags dispatchFlags(
|
2022-07-24 12:21:16 +08:00
|
|
|
nullptr, // barrierTimestampPacketNodes
|
|
|
|
{}, // pipelineSelectArgs
|
|
|
|
commandQueue.flushStamp->getStampReference(), // flushStampReference
|
|
|
|
commandQueue.getThrottle(), // throttle
|
|
|
|
ClPreemptionHelper::taskPreemptionMode(device, multiDispatch), // preemptionMode
|
2023-11-30 18:36:43 +08:00
|
|
|
GrfConfig::notApplicable, // numGrfRequired
|
|
|
|
L3CachingSettings::notApplicable, // l3CacheSettings
|
2022-07-24 12:21:16 +08:00
|
|
|
ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
|
2023-11-30 18:36:43 +08:00
|
|
|
AdditionalKernelExecInfo::notApplicable, // additionalKernelExecInfo
|
|
|
|
KernelExecutionType::notApplicable, // kernelExecutionType
|
|
|
|
MemoryCompressionState::notApplicable, // memoryCompressionState
|
2022-07-24 12:21:16 +08:00
|
|
|
commandQueue.getSliceCount(), // sliceCount
|
|
|
|
true, // blocking
|
|
|
|
true, // dcFlush
|
|
|
|
false, // useSLM
|
|
|
|
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
|
|
|
false, // GSBA32BitRequired
|
2023-12-19 15:40:17 +08:00
|
|
|
commandQueue.getPriority() == QueuePriority::low, // lowPriority
|
2022-07-24 12:21:16 +08:00
|
|
|
false, // implicitFlush
|
|
|
|
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
|
|
|
false, // epilogueRequired
|
|
|
|
false, // usePerDssBackedBuffer
|
|
|
|
false, // areMultipleSubDevicesInContext
|
|
|
|
false, // memoryMigrationRequired
|
2022-11-20 02:25:04 +08:00
|
|
|
false, // textureCacheFlush
|
2022-11-27 04:10:32 +08:00
|
|
|
false, // hasStallingCmds
|
2023-01-13 00:58:18 +08:00
|
|
|
false, // hasRelaxedOrderingDependencies
|
2023-02-28 20:20:30 +08:00
|
|
|
false, // stateCacheInvalidation
|
2023-07-13 17:26:41 +08:00
|
|
|
commandQueue.isStallingCommandsOnNextFlushRequired(), // isStallingCommandsOnNextFlushRequired
|
2024-09-19 22:35:03 +08:00
|
|
|
commandQueue.isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
2023-07-13 17:26:41 +08:00
|
|
|
);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2020-06-16 19:19:11 +08:00
|
|
|
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
gtpinNotifyPreFlushTask(&commandQueue);
|
2018-02-08 23:00:20 +08:00
|
|
|
|
2024-03-08 00:50:57 +08:00
|
|
|
completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(queueCommandStream,
|
|
|
|
offset,
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice())
|
|
|
|
: commandStreamReceiver.flushTask(queueCommandStream,
|
|
|
|
offset,
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice());
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2023-12-14 00:09:52 +08:00
|
|
|
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::dependencyResolveOnGpu);
|
2020-06-26 17:21:07 +08:00
|
|
|
|
2018-02-09 03:55:31 +08:00
|
|
|
if (!memObj.isMemObjZeroCopy()) {
|
2022-03-05 00:23:43 +08:00
|
|
|
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
2023-12-01 17:12:59 +08:00
|
|
|
if (waitStatus == WaitStatus::gpuHang) {
|
2022-03-05 00:23:43 +08:00
|
|
|
completionStamp.taskCount = CompletionStamp::gpuHang;
|
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
2023-12-14 00:09:52 +08:00
|
|
|
if (operationType == MapOperationType::map) {
|
2018-02-18 05:26:28 +08:00
|
|
|
memObj.transferDataToHostPtr(copySize, copyOffset);
|
|
|
|
} else if (!readOnly) {
|
2023-12-14 00:09:52 +08:00
|
|
|
DEBUG_BREAK_IF(operationType != MapOperationType::unmap);
|
2018-02-18 05:26:28 +08:00
|
|
|
memObj.transferDataFromHostPtr(copySize, copyOffset);
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
2022-03-01 23:14:04 +08:00
|
|
|
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
|
2021-12-07 16:40:35 +08:00
|
|
|
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
2023-01-20 00:11:39 +08:00
|
|
|
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
|
|
|
|
TagNodeBase *multiRootDeviceSyncNode)
|
2024-05-20 18:39:06 +08:00
|
|
|
: Command(commandQueue, kernelOperation, nullptr), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
|
2021-12-07 16:40:35 +08:00
|
|
|
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
|
2023-01-20 00:11:39 +08:00
|
|
|
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
|
2018-05-11 20:03:03 +08:00
|
|
|
UNRECOVERABLE_IF(nullptr == this->kernel);
|
|
|
|
kernel->incRefInternal();
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
CommandComputeKernel::~CommandComputeKernel() {
|
2018-05-11 20:03:03 +08:00
|
|
|
kernel->decRefInternal();
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2022-11-22 21:53:59 +08:00
|
|
|
CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool terminated) {
|
2017-12-21 07:45:38 +08:00
|
|
|
if (terminated) {
|
2021-12-20 22:32:06 +08:00
|
|
|
this->terminated = true;
|
2019-07-19 14:06:40 +08:00
|
|
|
for (auto surface : surfaces) {
|
|
|
|
delete surface;
|
|
|
|
}
|
|
|
|
surfaces.clear();
|
2017-12-21 07:45:38 +08:00
|
|
|
return completionStamp;
|
|
|
|
}
|
2019-07-15 20:28:09 +08:00
|
|
|
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
|
2021-09-18 01:09:06 +08:00
|
|
|
auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-08-06 20:55:04 +08:00
|
|
|
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-04-05 21:12:28 +08:00
|
|
|
IndirectHeap *dsh = kernelOperation->dsh.get();
|
|
|
|
IndirectHeap *ioh = kernelOperation->ioh.get();
|
|
|
|
IndirectHeap *ssh = kernelOperation->ssh.get();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-08-22 23:02:37 +08:00
|
|
|
auto anyUncacheableArgs = false;
|
2017-12-21 07:45:38 +08:00
|
|
|
for (auto &surface : surfaces) {
|
|
|
|
DEBUG_BREAK_IF(!surface);
|
|
|
|
surface->makeResident(commandStreamReceiver);
|
2019-08-22 23:02:37 +08:00
|
|
|
if (!surface->allowsL3Caching()) {
|
|
|
|
anyUncacheableArgs = true;
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (printfHandler) {
|
2022-05-10 00:21:21 +08:00
|
|
|
printfHandler->makeResident(commandStreamReceiver);
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
2019-09-04 15:33:21 +08:00
|
|
|
makeTimestampPacketsResident(commandStreamReceiver);
|
2023-01-20 00:11:39 +08:00
|
|
|
if (multiRootDeviceSyncNode != nullptr) {
|
|
|
|
commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation());
|
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-11-18 20:35:44 +08:00
|
|
|
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
2020-01-27 20:06:03 +08:00
|
|
|
CsrDependencies csrDeps;
|
2023-12-12 19:37:31 +08:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::all);
|
2020-01-27 20:06:03 +08:00
|
|
|
|
2019-11-18 20:35:44 +08:00
|
|
|
BlitProperties::setupDependenciesForAuxTranslation(kernelOperation->blitPropertiesContainer, *timestampPacketDependencies,
|
2020-01-27 20:06:03 +08:00
|
|
|
*currentTimestampPacketNodes, csrDeps,
|
2021-09-18 01:09:06 +08:00
|
|
|
commandQueue.getGpgpuCommandStreamReceiver(), *bcsCsrForAuxTranslation);
|
2023-02-28 20:20:30 +08:00
|
|
|
commandQueue.setStallingCommandsOnNextFlush(true);
|
2023-07-13 17:26:41 +08:00
|
|
|
commandQueue.setDcFlushRequiredOnStallingCommandsOnNextFlush(true);
|
2019-11-18 20:35:44 +08:00
|
|
|
}
|
|
|
|
|
2021-12-29 22:28:21 +08:00
|
|
|
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
|
|
|
|
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
|
|
|
|
}
|
|
|
|
|
2021-03-22 23:26:03 +08:00
|
|
|
const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
|
2020-12-07 22:41:52 +08:00
|
|
|
|
2023-01-04 17:45:07 +08:00
|
|
|
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
|
2020-12-17 08:36:45 +08:00
|
|
|
|
2019-09-13 18:00:30 +08:00
|
|
|
DispatchFlags dispatchFlags(
|
2024-03-08 23:30:37 +08:00
|
|
|
nullptr, // barrierTimestampPacketNodes
|
|
|
|
{false, kernel->isVmeKernel()}, // pipelineSelectArgs
|
|
|
|
commandQueue.flushStamp->getStampReference(), // flushStampReference
|
|
|
|
commandQueue.getThrottle(), // throttle
|
|
|
|
preemptionMode, // preemptionMode
|
|
|
|
kernelDescriptor.kernelAttributes.numGrfRequired, // numGrfRequired
|
|
|
|
L3CachingSettings::l3CacheOn, // l3CacheSettings
|
|
|
|
kernelDescriptor.kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
|
|
|
|
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
|
|
|
|
kernel->getExecutionType(), // kernelExecutionType
|
|
|
|
memoryCompressionState, // memoryCompressionState
|
|
|
|
commandQueue.getSliceCount(), // sliceCount
|
|
|
|
true, // blocking
|
|
|
|
flushDC, // dcFlush
|
|
|
|
slmUsed, // useSLM
|
|
|
|
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
|
|
|
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
|
|
|
commandQueue.getPriority() == QueuePriority::low, // lowPriority
|
|
|
|
false, // implicitFlush
|
|
|
|
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
|
|
|
false, // epilogueRequired
|
|
|
|
false, // usePerDssBackedBuffer
|
|
|
|
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
|
|
|
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
|
|
|
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
|
|
|
|
false, // hasStallingCmds
|
|
|
|
false, // hasRelaxedOrderingDependencies
|
|
|
|
false, // stateCacheInvalidation
|
|
|
|
commandQueue.isStallingCommandsOnNextFlushRequired(), // isStallingCommandsOnNextFlushRequired
|
2024-09-19 22:35:03 +08:00
|
|
|
commandQueue.isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
2023-07-13 17:26:41 +08:00
|
|
|
);
|
2021-06-14 23:33:53 +08:00
|
|
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
2023-01-20 00:11:39 +08:00
|
|
|
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
2021-06-14 23:33:53 +08:00
|
|
|
}
|
|
|
|
|
2023-02-28 20:20:30 +08:00
|
|
|
const bool isHandlingBarrier = commandQueue.isStallingCommandsOnNextFlushRequired();
|
2021-12-29 22:28:21 +08:00
|
|
|
|
2019-11-18 20:35:44 +08:00
|
|
|
if (timestampPacketDependencies) {
|
2021-12-29 22:28:21 +08:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
|
|
|
}
|
2019-11-18 20:35:44 +08:00
|
|
|
dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes;
|
2018-09-20 01:34:33 +08:00
|
|
|
}
|
2022-08-31 21:26:29 +08:00
|
|
|
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = kernel->requiresSystolicPipelineSelectMode();
|
2019-08-22 23:02:37 +08:00
|
|
|
if (anyUncacheableArgs) {
|
|
|
|
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
|
2019-08-26 23:03:13 +08:00
|
|
|
} else if (!kernel->areStatelessWritesUsed()) {
|
|
|
|
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
|
2019-08-22 23:02:37 +08:00
|
|
|
}
|
|
|
|
|
2019-10-28 11:23:14 +08:00
|
|
|
if (commandQueue.dispatchHints != 0) {
|
|
|
|
dispatchFlags.engineHints = commandQueue.dispatchHints;
|
|
|
|
dispatchFlags.epilogueRequired = true;
|
|
|
|
}
|
|
|
|
|
2020-06-16 19:19:11 +08:00
|
|
|
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-02-08 23:00:20 +08:00
|
|
|
gtpinNotifyPreFlushTask(&commandQueue);
|
|
|
|
|
2021-07-02 00:00:22 +08:00
|
|
|
if (kernel->requiresMemoryMigration()) {
|
|
|
|
for (auto &arg : kernel->getMemObjectsToMigrate()) {
|
|
|
|
MigrationController::handleMigration(commandQueue.getContext(), commandStreamReceiver, arg.second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-08 00:50:57 +08:00
|
|
|
completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream,
|
|
|
|
0,
|
|
|
|
dsh,
|
|
|
|
ioh,
|
|
|
|
ssh,
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice())
|
|
|
|
: commandStreamReceiver.flushTask(*kernelOperation->commandStream,
|
|
|
|
0,
|
|
|
|
dsh,
|
|
|
|
ioh,
|
|
|
|
ssh,
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice());
|
2018-12-04 21:18:17 +08:00
|
|
|
|
2021-12-29 22:28:21 +08:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
commandQueue.clearLastBcsPackets();
|
2023-02-28 20:20:30 +08:00
|
|
|
commandQueue.setStallingCommandsOnNextFlush(false);
|
2021-12-29 22:28:21 +08:00
|
|
|
}
|
|
|
|
|
2020-06-19 22:33:53 +08:00
|
|
|
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
2022-02-03 21:29:48 +08:00
|
|
|
const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
2022-11-04 21:57:42 +08:00
|
|
|
if (newTaskCount <= CompletionStamp::notReady) {
|
|
|
|
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
|
2022-05-06 00:52:25 +08:00
|
|
|
} else {
|
2022-11-04 21:57:42 +08:00
|
|
|
completionStamp.taskCount = newTaskCount;
|
2022-05-06 00:52:25 +08:00
|
|
|
}
|
2020-06-19 22:33:53 +08:00
|
|
|
}
|
2023-12-14 00:09:52 +08:00
|
|
|
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::gpuKernel);
|
2020-06-19 22:33:53 +08:00
|
|
|
|
2020-02-19 23:32:40 +08:00
|
|
|
if (gtpinIsGTPinInitialized()) {
|
|
|
|
gtpinNotifyFlushTask(completionStamp.taskCount);
|
|
|
|
}
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
if (printfHandler) {
|
2022-05-06 00:52:25 +08:00
|
|
|
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
2023-12-01 17:12:59 +08:00
|
|
|
if (waitStatus == WaitStatus::gpuHang) {
|
2022-11-04 21:57:42 +08:00
|
|
|
completionStamp.taskCount = CompletionStamp::gpuHang;
|
2022-05-06 00:52:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!printfHandler->printEnqueueOutput()) {
|
2022-11-04 21:57:42 +08:00
|
|
|
completionStamp.taskCount = CompletionStamp::gpuHang;
|
2022-05-06 00:52:25 +08:00
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
|
2019-07-19 14:06:40 +08:00
|
|
|
for (auto surface : surfaces) {
|
|
|
|
delete surface;
|
|
|
|
}
|
|
|
|
surfaces.clear();
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
2022-11-22 21:53:59 +08:00
|
|
|
TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
|
2021-08-26 23:31:09 +08:00
|
|
|
auto bcsCsr = kernelOperation->bcsCsr;
|
2019-11-30 00:34:18 +08:00
|
|
|
UNRECOVERABLE_IF(bcsCsr == nullptr);
|
2019-09-04 15:33:21 +08:00
|
|
|
|
2019-11-07 16:15:53 +08:00
|
|
|
UNRECOVERABLE_IF(kernelOperation->blitPropertiesContainer.size() != 1);
|
|
|
|
auto &blitProperties = *kernelOperation->blitPropertiesContainer.begin();
|
2023-12-12 19:37:31 +08:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::all);
|
2021-03-11 21:48:04 +08:00
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->cacheFlushNodes);
|
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->previousEnqueueNodes);
|
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes);
|
2024-05-20 18:39:06 +08:00
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->multiCsrDependencies);
|
2019-11-13 00:56:10 +08:00
|
|
|
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
2019-09-04 15:33:21 +08:00
|
|
|
|
2021-06-14 23:33:53 +08:00
|
|
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
2023-01-20 00:11:39 +08:00
|
|
|
eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr);
|
2021-06-14 23:33:53 +08:00
|
|
|
}
|
|
|
|
|
2022-02-03 21:29:48 +08:00
|
|
|
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
2022-11-04 21:57:42 +08:00
|
|
|
if (newTaskCount > CompletionStamp::notReady) {
|
|
|
|
return newTaskCount;
|
2022-05-06 00:52:25 +08:00
|
|
|
}
|
|
|
|
|
2022-11-04 21:57:42 +08:00
|
|
|
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
2021-12-29 22:28:21 +08:00
|
|
|
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
|
2022-05-06 00:52:25 +08:00
|
|
|
|
2022-11-04 21:57:42 +08:00
|
|
|
return newTaskCount;
|
2019-09-04 15:33:21 +08:00
|
|
|
}
|
|
|
|
|
2022-11-22 21:53:59 +08:00
|
|
|
CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool terminated) {
|
2019-07-23 03:28:59 +08:00
|
|
|
if (terminated) {
|
2021-12-20 22:32:06 +08:00
|
|
|
this->terminated = true;
|
2019-07-23 03:28:59 +08:00
|
|
|
return completionStamp;
|
|
|
|
}
|
2024-05-20 18:39:06 +08:00
|
|
|
for (auto &tagCsrPair : csrDependencies) {
|
|
|
|
bool submitStatus = tagCsrPair.first->submitDependencyUpdate(tagCsrPair.second);
|
|
|
|
if (!submitStatus) {
|
|
|
|
completionStamp.taskCount = CompletionStamp::gpuHang;
|
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
}
|
2019-07-23 03:28:59 +08:00
|
|
|
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
|
2019-07-23 02:55:09 +08:00
|
|
|
|
|
|
|
if (!kernelOperation) {
|
|
|
|
completionStamp.taskCount = commandStreamReceiver.peekTaskCount();
|
|
|
|
completionStamp.taskLevel = commandStreamReceiver.peekTaskLevel();
|
|
|
|
completionStamp.flushStamp = commandStreamReceiver.obtainCurrentFlushStamp();
|
|
|
|
|
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
2021-06-14 23:33:53 +08:00
|
|
|
auto barrierNodes = timestampPacketDependencies ? ×tampPacketDependencies->barrierNodes : nullptr;
|
2019-07-23 02:55:09 +08:00
|
|
|
auto lockCSR = commandStreamReceiver.obtainUniqueOwnership();
|
|
|
|
|
2023-12-14 00:09:52 +08:00
|
|
|
auto enqueueOperationType = EnqueueProperties::Operation::dependencyResolveOnGpu;
|
2020-06-26 17:21:07 +08:00
|
|
|
|
2019-09-04 15:33:21 +08:00
|
|
|
if (kernelOperation->blitEnqueue) {
|
2023-12-14 00:09:52 +08:00
|
|
|
enqueueOperationType = EnqueueProperties::Operation::blit;
|
2020-06-26 17:21:07 +08:00
|
|
|
|
2021-06-14 23:33:53 +08:00
|
|
|
UNRECOVERABLE_IF(!barrierNodes);
|
2023-02-28 20:20:30 +08:00
|
|
|
if (commandQueue.isStallingCommandsOnNextFlushRequired()) {
|
2021-06-14 23:33:53 +08:00
|
|
|
barrierNodes->add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
|
2019-11-12 16:37:16 +08:00
|
|
|
}
|
2019-09-04 15:33:21 +08:00
|
|
|
}
|
|
|
|
|
2021-12-29 22:28:21 +08:00
|
|
|
if (timestampPacketDependencies && commandQueue.isOOQEnabled()) {
|
|
|
|
commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies);
|
|
|
|
}
|
|
|
|
|
2021-07-02 00:00:22 +08:00
|
|
|
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
|
2019-09-13 18:00:30 +08:00
|
|
|
DispatchFlags dispatchFlags(
|
2022-07-24 12:21:16 +08:00
|
|
|
barrierNodes, // barrierTimestampPacketNodes
|
|
|
|
{}, // pipelineSelectArgs
|
|
|
|
commandQueue.flushStamp->getStampReference(), // flushStampReference
|
|
|
|
commandQueue.getThrottle(), // throttle
|
|
|
|
commandQueue.getDevice().getPreemptionMode(), // preemptionMode
|
2023-11-30 18:36:43 +08:00
|
|
|
GrfConfig::notApplicable, // numGrfRequired
|
|
|
|
L3CachingSettings::notApplicable, // l3CacheSettings
|
2022-07-24 12:21:16 +08:00
|
|
|
ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
|
2023-11-30 18:36:43 +08:00
|
|
|
AdditionalKernelExecInfo::notApplicable, // additionalKernelExecInfo
|
|
|
|
KernelExecutionType::notApplicable, // kernelExecutionType
|
|
|
|
MemoryCompressionState::notApplicable, // memoryCompressionState
|
2022-07-24 12:21:16 +08:00
|
|
|
commandQueue.getSliceCount(), // sliceCount
|
|
|
|
true, // blocking
|
|
|
|
false, // dcFlush
|
|
|
|
false, // useSLM
|
|
|
|
!commandStreamReceiver.isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
|
|
|
false, // GSBA32BitRequired
|
2023-12-19 15:40:17 +08:00
|
|
|
commandQueue.getPriority() == QueuePriority::low, // lowPriority
|
2022-07-24 12:21:16 +08:00
|
|
|
false, // implicitFlush
|
|
|
|
commandStreamReceiver.isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
|
|
|
false, // epilogueRequired
|
|
|
|
false, // usePerDssBackedBuffer
|
|
|
|
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
|
|
|
false, // memoryMigrationRequired
|
2022-11-20 02:25:04 +08:00
|
|
|
false, // textureCacheFlush
|
2022-11-27 04:10:32 +08:00
|
|
|
false, // hasStallingCmds
|
2023-01-13 00:58:18 +08:00
|
|
|
false, // hasRelaxedOrderingDependencies
|
2023-02-28 20:20:30 +08:00
|
|
|
false, // stateCacheInvalidation
|
2023-07-13 17:26:41 +08:00
|
|
|
commandQueue.isStallingCommandsOnNextFlushRequired(), // isStallingCommandsOnNextFlushRequired
|
2024-09-19 22:35:03 +08:00
|
|
|
commandQueue.isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
2023-07-13 17:26:41 +08:00
|
|
|
);
|
2019-07-23 02:55:09 +08:00
|
|
|
|
2021-06-14 23:33:53 +08:00
|
|
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
2023-01-20 00:11:39 +08:00
|
|
|
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
2021-06-14 23:33:53 +08:00
|
|
|
}
|
2019-07-23 02:55:09 +08:00
|
|
|
|
2023-02-28 20:20:30 +08:00
|
|
|
const bool isHandlingBarrier = commandQueue.isStallingCommandsOnNextFlushRequired();
|
2021-12-29 22:28:21 +08:00
|
|
|
|
2021-06-22 21:16:27 +08:00
|
|
|
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
2021-12-29 22:28:21 +08:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
|
|
|
}
|
2021-06-22 21:16:27 +08:00
|
|
|
makeTimestampPacketsResident(commandStreamReceiver);
|
|
|
|
}
|
2019-07-23 02:55:09 +08:00
|
|
|
|
|
|
|
gtpinNotifyPreFlushTask(&commandQueue);
|
|
|
|
|
2024-03-08 00:50:57 +08:00
|
|
|
completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream,
|
|
|
|
0,
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice())
|
|
|
|
: commandStreamReceiver.flushTask(*kernelOperation->commandStream,
|
|
|
|
0,
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u),
|
|
|
|
&commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u),
|
|
|
|
taskLevel,
|
|
|
|
dispatchFlags,
|
|
|
|
commandQueue.getDevice());
|
2019-07-23 03:28:59 +08:00
|
|
|
|
2021-12-29 22:28:21 +08:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
commandQueue.clearLastBcsPackets();
|
2023-02-28 20:20:30 +08:00
|
|
|
commandQueue.setStallingCommandsOnNextFlush(false);
|
2021-12-29 22:28:21 +08:00
|
|
|
}
|
|
|
|
|
2020-06-19 22:33:53 +08:00
|
|
|
if (kernelOperation->blitEnqueue) {
|
2022-11-04 21:57:42 +08:00
|
|
|
auto taskCount = dispatchBlitOperation();
|
|
|
|
if (taskCount > CompletionStamp::notReady) {
|
|
|
|
completionStamp.taskCount = taskCount;
|
2022-05-06 00:52:25 +08:00
|
|
|
}
|
2020-06-19 22:33:53 +08:00
|
|
|
}
|
|
|
|
|
2020-06-26 17:21:07 +08:00
|
|
|
commandQueue.updateLatestSentEnqueueType(enqueueOperationType);
|
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
return completionStamp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Command::setEventsRequest(EventsRequest &eventsRequest) {
|
2019-01-11 16:00:11 +08:00
|
|
|
this->eventsRequest = eventsRequest;
|
|
|
|
if (eventsRequest.numEventsInWaitList > 0) {
|
|
|
|
eventsWaitlist.resize(eventsRequest.numEventsInWaitList);
|
|
|
|
auto size = eventsRequest.numEventsInWaitList * sizeof(cl_event);
|
|
|
|
memcpy_s(&eventsWaitlist[0], size, eventsRequest.eventWaitList, size);
|
|
|
|
this->eventsRequest.eventWaitList = &eventsWaitlist[0];
|
2021-06-14 23:33:53 +08:00
|
|
|
for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
|
|
|
auto event = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
|
|
|
event->incRefInternal();
|
|
|
|
}
|
2019-01-11 16:00:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-16 18:59:18 +08:00
|
|
|
void Command::setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketDependencies &&dependencies) {
|
2018-11-27 20:07:41 +08:00
|
|
|
currentTimestampPacketNodes = std::make_unique<TimestampPacketContainer>();
|
2018-10-03 05:37:30 +08:00
|
|
|
currentTimestampPacketNodes->assignAndIncrementNodesRefCounts(current);
|
|
|
|
|
2019-11-16 18:59:18 +08:00
|
|
|
timestampPacketDependencies = std::make_unique<TimestampPacketDependencies>();
|
|
|
|
*timestampPacketDependencies = std::move(dependencies);
|
2018-08-30 14:18:50 +08:00
|
|
|
}
|
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
Command::~Command() {
|
2021-12-20 22:32:06 +08:00
|
|
|
if (terminated) {
|
|
|
|
if (commandQueue.getTimestampPacketContainer()) {
|
|
|
|
std::array<uint32_t, 8u> timestampData;
|
|
|
|
timestampData.fill(std::numeric_limits<uint32_t>::max());
|
|
|
|
if (currentTimestampPacketNodes.get()) {
|
|
|
|
for (auto &node : currentTimestampPacketNodes->peekNodes()) {
|
|
|
|
for (const auto &cmdQueueNode : commandQueue.getTimestampPacketContainer()->peekNodes()) {
|
|
|
|
if (node == cmdQueueNode) {
|
|
|
|
for (uint32_t i = 0; i < node->getPacketsUsed(); i++) {
|
|
|
|
node->assignDataToAllTimestamps(i, timestampData.data());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (commandQueue.getDeferredTimestampPackets() && timestampPacketDependencies.get()) {
|
|
|
|
timestampPacketDependencies->moveNodesToNewContainer(*commandQueue.getDeferredTimestampPackets());
|
|
|
|
}
|
2021-06-22 01:39:40 +08:00
|
|
|
}
|
|
|
|
|
2021-06-14 23:33:53 +08:00
|
|
|
for (cl_event &eventFromWaitList : eventsWaitlist) {
|
|
|
|
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
|
|
|
|
event->decRefInternal();
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
2019-07-23 03:28:59 +08:00
|
|
|
}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-09-04 15:33:21 +08:00
|
|
|
void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver) {
|
2019-09-04 17:34:23 +08:00
|
|
|
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
|
|
|
for (cl_event &eventFromWaitList : eventsWaitlist) {
|
|
|
|
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
|
2021-06-14 23:33:53 +08:00
|
|
|
if (event->getTimestampPacketNodes() && event->getCommandQueue()->getClDevice().getRootDeviceIndex() == commandStreamReceiver.getRootDeviceIndex()) {
|
2019-09-04 17:34:23 +08:00
|
|
|
event->getTimestampPacketNodes()->makeResident(commandStreamReceiver);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-23 02:55:09 +08:00
|
|
|
if (currentTimestampPacketNodes) {
|
|
|
|
currentTimestampPacketNodes->makeResident(commandStreamReceiver);
|
|
|
|
}
|
2019-11-16 18:59:18 +08:00
|
|
|
if (timestampPacketDependencies) {
|
2020-02-27 20:29:15 +08:00
|
|
|
timestampPacketDependencies->cacheFlushNodes.makeResident(commandStreamReceiver);
|
2019-11-16 18:59:18 +08:00
|
|
|
timestampPacketDependencies->previousEnqueueNodes.makeResident(commandStreamReceiver);
|
2019-07-23 02:55:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-23 03:28:59 +08:00
|
|
|
Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {}
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2024-05-20 18:39:06 +08:00
|
|
|
Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, CsrDependencyContainer *csrDependencies)
|
|
|
|
: commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {
|
|
|
|
if (csrDependencies) {
|
|
|
|
this->csrDependencies = *csrDependencies;
|
|
|
|
}
|
|
|
|
}
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|