2017-12-21 00:45:38 +01:00
|
|
|
/*
|
2022-12-28 19:37:21 +00:00
|
|
|
* Copyright (C) 2018-2023 Intel Corporation
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
2018-09-18 18:31:19 -07:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma once
|
2020-02-24 13:10:44 +01:00
|
|
|
#include "shared/source/built_ins/built_ins.h"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
2022-03-21 11:08:43 +00:00
|
|
|
#include "shared/source/command_stream/wait_status.h"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/helpers/engine_node_helper.h"
|
2021-12-22 14:11:05 +00:00
|
|
|
#include "shared/source/helpers/pipe_control_args.h"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
|
|
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
|
|
|
#include "shared/source/memory_manager/surface.h"
|
|
|
|
|
#include "shared/source/os_interface/os_context.h"
|
|
|
|
|
#include "shared/source/program/sync_buffer_handler.h"
|
2021-03-19 23:14:09 +00:00
|
|
|
#include "shared/source/program/sync_buffer_handler.inl"
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/utilities/range.h"
|
|
|
|
|
#include "shared/source/utilities/tag_allocator.h"
|
2020-02-24 10:22:30 +01:00
|
|
|
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
|
|
|
|
#include "opencl/source/command_queue/command_queue_hw.h"
|
|
|
|
|
#include "opencl/source/command_queue/gpgpu_walker.h"
|
|
|
|
|
#include "opencl/source/command_queue/hardware_interface.h"
|
|
|
|
|
#include "opencl/source/event/event_builder.h"
|
|
|
|
|
#include "opencl/source/event/user_event.h"
|
|
|
|
|
#include "opencl/source/gtpin/gtpin_notify.h"
|
|
|
|
|
#include "opencl/source/helpers/cl_blit_properties.h"
|
2020-10-22 16:13:05 +02:00
|
|
|
#include "opencl/source/helpers/cl_hw_helper.h"
|
2021-09-22 22:24:59 +00:00
|
|
|
#include "opencl/source/helpers/cl_preemption_helper.h"
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/helpers/dispatch_info_builder.h"
|
|
|
|
|
#include "opencl/source/helpers/enqueue_properties.h"
|
|
|
|
|
#include "opencl/source/helpers/task_information.h"
|
|
|
|
|
#include "opencl/source/mem_obj/buffer.h"
|
|
|
|
|
#include "opencl/source/mem_obj/image.h"
|
2021-07-01 16:00:22 +00:00
|
|
|
#include "opencl/source/memory_manager/migration_controller.h"
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/program/printf_handler.h"
|
2021-10-08 11:02:35 +00:00
|
|
|
#include "opencl/source/utilities/cl_logger.h"
|
2019-02-27 11:39:32 +01:00
|
|
|
|
2018-11-05 05:26:45 -08:00
|
|
|
#include <algorithm>
|
2018-04-18 14:59:28 +02:00
|
|
|
#include <new>
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2019-03-26 11:59:46 +01:00
|
|
|
namespace NEO {
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-04-09 16:39:32 +02:00
|
|
|
template <uint32_t commandType, size_t surfaceCount>
|
2022-03-21 11:08:43 +00:00
|
|
|
cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount],
|
|
|
|
|
bool blocking,
|
|
|
|
|
Kernel *kernel,
|
|
|
|
|
cl_uint workDim,
|
|
|
|
|
const size_t globalOffsets[3],
|
|
|
|
|
const size_t workItems[3],
|
|
|
|
|
const size_t *localWorkSizesIn,
|
|
|
|
|
const size_t *enqueuedWorkSizes,
|
|
|
|
|
cl_uint numEventsInWaitList,
|
|
|
|
|
const cl_event *eventWaitList,
|
|
|
|
|
cl_event *event) {
|
2019-02-22 13:22:06 +01:00
|
|
|
BuiltInOwnershipWrapper builtInLock;
|
2022-04-05 16:47:19 +00:00
|
|
|
std::unique_ptr<KernelObjsForAuxTranslation> kernelObjsForAuxTranslation;
|
2019-02-22 13:22:06 +01:00
|
|
|
MultiDispatchInfo multiDispatchInfo(kernel);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2021-03-01 15:05:04 +00:00
|
|
|
auto auxTranslationMode = AuxTranslationMode::None;
|
|
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
kernel->updateAuxTranslationRequired();
|
|
|
|
|
if (kernel->isAuxTranslationRequired()) {
|
2022-04-05 16:47:19 +00:00
|
|
|
kernelObjsForAuxTranslation = kernel->fillWithKernelObjsForAuxTranslation();
|
2021-03-01 15:05:04 +00:00
|
|
|
|
2022-04-05 16:47:19 +00:00
|
|
|
if (!kernelObjsForAuxTranslation->empty()) {
|
2022-12-08 12:22:35 +00:00
|
|
|
auxTranslationMode = GfxCoreHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
|
2019-02-22 13:22:06 +01:00
|
|
|
}
|
2022-04-05 16:47:19 +00:00
|
|
|
multiDispatchInfo.setKernelObjsForAuxTranslation(std::move(kernelObjsForAuxTranslation));
|
2022-01-03 17:29:57 +00:00
|
|
|
}
|
2021-03-01 15:05:04 +00:00
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
|
|
|
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
|
|
|
|
builtInLock.takeOwnership(builder, this->context);
|
2021-03-01 15:05:04 +00:00
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
|
|
|
|
}
|
2021-03-01 15:05:04 +00:00
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
|
|
|
|
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
|
|
|
|
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
|
|
|
|
builder.setKernel(kernel);
|
|
|
|
|
builder.bake(multiDispatchInfo);
|
|
|
|
|
} else {
|
|
|
|
|
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
|
|
|
|
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
if (multiDispatchInfo.size() == 0) {
|
2022-03-21 11:08:43 +00:00
|
|
|
return CL_SUCCESS;
|
2019-02-22 13:22:06 +01:00
|
|
|
}
|
2022-01-03 17:29:57 +00:00
|
|
|
}
|
2021-03-01 15:05:04 +00:00
|
|
|
|
2022-12-30 12:37:23 +00:00
|
|
|
if constexpr (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
2022-11-08 13:33:22 +00:00
|
|
|
if (!multiDispatchInfo.empty()) {
|
|
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
|
|
|
|
auto nwgs = dispatchInfo.getNumberOfWorkgroups();
|
|
|
|
|
|
|
|
|
|
for (auto i = 0u; i < workDim; i++) {
|
|
|
|
|
uint64_t dimension = static_cast<uint64_t>(nwgs[i]);
|
|
|
|
|
if (dimension > std::numeric_limits<uint32_t>::max()) {
|
|
|
|
|
return CL_INVALID_GLOBAL_WORK_SIZE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-03 17:29:57 +00:00
|
|
|
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
|
|
|
|
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2019-02-22 13:22:06 +01:00
|
|
|
|
2021-03-01 15:05:04 +00:00
|
|
|
if (AuxTranslationMode::Blit == auxTranslationMode) {
|
2019-11-09 19:02:25 +01:00
|
|
|
setupBlitAuxTranslation(multiDispatchInfo);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-21 11:08:43 +00:00
|
|
|
return enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-04-09 16:39:32 +02:00
|
|
|
template <uint32_t commandType>
|
2022-03-21 11:08:43 +00:00
|
|
|
cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|
|
|
|
size_t numSurfaceForResidency,
|
|
|
|
|
bool blocking,
|
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo,
|
|
|
|
|
cl_uint numEventsInWaitList,
|
|
|
|
|
const cl_event *eventWaitList,
|
|
|
|
|
cl_event *event) {
|
2019-07-03 09:30:30 +02:00
|
|
|
if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) {
|
2022-03-21 11:08:43 +00:00
|
|
|
const auto enqueueResult = enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, blocking, multiDispatchInfo,
|
|
|
|
|
numEventsInWaitList, eventWaitList, event);
|
|
|
|
|
if (enqueueResult != CL_SUCCESS) {
|
|
|
|
|
return enqueueResult;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
if (event) {
|
|
|
|
|
castToObjectOrAbort<Event>(*event)->setCmdType(commandType);
|
|
|
|
|
}
|
2022-03-21 11:08:43 +00:00
|
|
|
return CL_SUCCESS;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2021-03-24 18:21:13 +00:00
|
|
|
TagNodeBase *hwTimeStamps = nullptr;
|
2021-12-10 21:31:34 +00:00
|
|
|
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
EventBuilder eventBuilder;
|
2020-08-26 11:26:44 +02:00
|
|
|
setupEvent(eventBuilder, event, commandType);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2022-12-07 15:04:37 +00:00
|
|
|
bool isMarkerWithPostSyncWrite = (CL_COMMAND_MARKER == commandType) && ((eventBuilder.getEvent() && eventBuilder.getEvent()->isProfilingEnabled()) || multiDispatchInfo.peekBuiltinOpParams().bcsSplit);
|
2021-05-20 15:07:00 +00:00
|
|
|
|
2019-07-18 21:15:50 +02:00
|
|
|
std::unique_ptr<KernelOperation> blockedCommandsData;
|
2017-12-21 00:45:38 +01:00
|
|
|
std::unique_ptr<PrintfHandler> printfHandler;
|
|
|
|
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
2022-03-04 09:46:29 +00:00
|
|
|
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2017-12-22 16:05:10 +01:00
|
|
|
auto blockQueue = false;
|
2022-11-22 13:53:59 +00:00
|
|
|
TaskCountType taskLevel = 0u;
|
2021-03-11 13:48:04 +00:00
|
|
|
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
enqueueHandlerHook(commandType, multiDispatchInfo);
|
|
|
|
|
|
2021-06-18 10:35:54 +00:00
|
|
|
bool clearDependenciesForSubCapture = false;
|
|
|
|
|
aubCaptureHook(blocking, clearDependenciesForSubCapture, multiDispatchInfo);
|
|
|
|
|
|
2022-11-29 13:28:05 +00:00
|
|
|
const bool clearAllDependencies = (queueDependenciesClearRequired() || clearDependenciesForSubCapture);
|
2019-04-16 14:39:40 +02:00
|
|
|
|
2018-06-12 20:33:03 +02:00
|
|
|
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
|
|
|
|
|
blocking = true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-13 12:23:29 +01:00
|
|
|
TimestampPacketDependencies timestampPacketDependencies;
|
2021-03-11 13:48:04 +00:00
|
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
2019-01-25 10:20:32 +01:00
|
|
|
CsrDependencies csrDeps;
|
2019-11-07 09:15:53 +01:00
|
|
|
BlitPropertiesContainer blitPropertiesContainer;
|
2019-01-25 10:20:32 +01:00
|
|
|
|
2021-04-21 12:35:47 +00:00
|
|
|
if (this->context->getRootDeviceIndices().size() > 1) {
|
2022-12-24 18:25:41 +01:00
|
|
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver);
|
2021-04-21 12:35:47 +00:00
|
|
|
}
|
2021-03-11 13:48:04 +00:00
|
|
|
|
2022-11-29 13:28:05 +00:00
|
|
|
const bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo);
|
|
|
|
|
const auto &hwInfo = this->getDevice().getHardwareInfo();
|
2022-12-28 22:59:37 +00:00
|
|
|
auto &productHelper = getDevice().getProductHelper();
|
2022-11-29 13:28:05 +00:00
|
|
|
bool canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = false;
|
2020-03-09 13:48:30 +01:00
|
|
|
|
2021-12-10 21:31:34 +00:00
|
|
|
if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
2022-11-29 13:28:05 +00:00
|
|
|
canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = this->peekLatestSentEnqueueOperation() == EnqueueProperties::Operation::GpuKernel &&
|
2022-12-28 22:59:37 +00:00
|
|
|
productHelper.isResolveDependenciesByPipeControlsSupported(hwInfo, this->isOOQEnabled());
|
2022-11-29 13:28:05 +00:00
|
|
|
if (false == clearDependenciesForSubCapture &&
|
|
|
|
|
false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
|
2021-12-10 21:31:34 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr);
|
2021-06-18 10:35:54 +00:00
|
|
|
}
|
|
|
|
|
|
2021-12-10 21:31:34 +00:00
|
|
|
auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator();
|
2019-01-25 10:20:32 +01:00
|
|
|
|
2019-07-03 09:30:30 +02:00
|
|
|
size_t nodesCount = 0u;
|
2022-12-07 15:04:37 +00:00
|
|
|
if (isCacheFlushCommand(commandType) || isMarkerWithPostSyncWrite) {
|
2019-07-03 09:30:30 +02:00
|
|
|
nodesCount = 1;
|
|
|
|
|
} else if (!multiDispatchInfo.empty()) {
|
|
|
|
|
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-28 19:14:29 +02:00
|
|
|
if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) {
|
2020-06-24 13:32:09 +02:00
|
|
|
// Cache flush for aux translation is always required (if supported)
|
2020-08-28 19:14:29 +02:00
|
|
|
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
2019-10-03 14:38:49 +02:00
|
|
|
}
|
|
|
|
|
|
2019-05-23 13:51:32 +02:00
|
|
|
if (nodesCount > 0) {
|
2021-12-10 21:31:34 +00:00
|
|
|
obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, computeCommandStreamReceiver);
|
2022-11-29 13:28:05 +00:00
|
|
|
if (false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
|
|
|
|
|
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
|
|
|
|
}
|
2019-01-25 10:20:32 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-28 19:14:29 +02:00
|
|
|
auto &commandStream = *obtainCommandStream<commandType>(csrDeps, false, blockQueue, multiDispatchInfo, eventsRequest,
|
2022-12-07 15:04:37 +00:00
|
|
|
blockedCommandsData, surfacesForResidency, numSurfaceForResidency, isMarkerWithPostSyncWrite);
|
2019-01-25 10:20:32 +01:00
|
|
|
auto commandStreamStart = commandStream.getUsed();
|
2018-09-18 15:13:34 -07:00
|
|
|
|
2022-11-29 13:28:05 +00:00
|
|
|
if (canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
|
|
|
|
|
PipeControlArgs args;
|
|
|
|
|
args.csStallOnly = true;
|
|
|
|
|
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStream, args);
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-21 12:35:47 +00:00
|
|
|
if (this->context->getRootDeviceIndices().size() > 1) {
|
2022-12-24 18:25:41 +01:00
|
|
|
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, csrDeps);
|
2021-04-21 12:35:47 +00:00
|
|
|
}
|
2021-03-11 13:48:04 +00:00
|
|
|
|
2021-03-01 15:05:04 +00:00
|
|
|
if (enqueueWithBlitAuxTranslation) {
|
2021-08-25 16:03:15 +00:00
|
|
|
processDispatchForBlitAuxTranslation(*getBcsForAuxTranslation(), multiDispatchInfo, blitPropertiesContainer,
|
|
|
|
|
timestampPacketDependencies, eventsRequest, blockQueue);
|
2019-11-09 19:02:25 +01:00
|
|
|
}
|
|
|
|
|
|
2021-12-10 21:31:34 +00:00
|
|
|
if (eventBuilder.getEvent() && computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
2019-04-10 12:44:02 +02:00
|
|
|
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
2019-11-09 19:02:25 +01:00
|
|
|
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
|
2020-07-31 16:07:14 +02:00
|
|
|
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.auxToNonAuxNodes);
|
2019-04-10 12:44:02 +02:00
|
|
|
}
|
|
|
|
|
|
2019-07-10 11:39:19 +02:00
|
|
|
bool flushDependenciesForNonKernelCommand = false;
|
|
|
|
|
|
2020-08-28 19:14:29 +02:00
|
|
|
if (multiDispatchInfo.empty() == false) {
|
2019-02-21 17:44:17 +01:00
|
|
|
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
2022-01-03 17:29:57 +00:00
|
|
|
hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(),
|
2019-11-13 12:23:29 +01:00
|
|
|
timestampPacketDependencies);
|
2019-03-22 13:40:41 +01:00
|
|
|
} else if (isCacheFlushCommand(commandType)) {
|
2019-04-10 12:44:02 +02:00
|
|
|
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
2021-12-10 21:31:34 +00:00
|
|
|
} else if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
2018-10-15 10:35:45 +02:00
|
|
|
if (CL_COMMAND_BARRIER == commandType) {
|
2021-12-10 21:31:34 +00:00
|
|
|
computeCommandStreamReceiver.requestStallingCommandsOnNextFlush();
|
2018-10-05 12:51:57 -07:00
|
|
|
}
|
2019-07-10 11:39:19 +02:00
|
|
|
|
|
|
|
|
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
|
|
|
|
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
|
|
|
|
if (waitlistEvent->getTimestampPacketNodes()) {
|
|
|
|
|
flushDependenciesForNonKernelCommand = true;
|
|
|
|
|
if (eventBuilder.getEvent()) {
|
2018-10-18 10:08:39 +02:00
|
|
|
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
|
|
|
|
|
}
|
2018-10-05 12:51:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
2021-05-20 15:07:00 +00:00
|
|
|
|
2022-12-07 15:04:37 +00:00
|
|
|
if (isMarkerWithPostSyncWrite) {
|
2022-01-18 10:43:38 +00:00
|
|
|
flushDependenciesForNonKernelCommand = true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-10 11:39:19 +02:00
|
|
|
if (flushDependenciesForNonKernelCommand) {
|
2021-06-23 10:34:31 +00:00
|
|
|
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps);
|
2019-07-10 11:39:19 +02:00
|
|
|
}
|
2021-05-20 15:07:00 +00:00
|
|
|
|
2022-12-07 15:04:37 +00:00
|
|
|
if (isMarkerWithPostSyncWrite) {
|
2021-07-19 15:07:12 +00:00
|
|
|
if (numEventsInWaitList == 0) {
|
2021-12-10 21:31:34 +00:00
|
|
|
computeCommandStreamReceiver.programComputeBarrierCommand(commandStream);
|
2021-07-19 15:07:12 +00:00
|
|
|
}
|
2021-05-20 15:07:00 +00:00
|
|
|
processDispatchForMarkerWithTimestampPacket(*this, &commandStream, eventsRequest, csrDeps);
|
|
|
|
|
}
|
2022-12-07 15:04:37 +00:00
|
|
|
} else if (isMarkerWithPostSyncWrite) {
|
2021-05-20 15:07:00 +00:00
|
|
|
processDispatchForMarker(*this, &commandStream, eventsRequest, csrDeps);
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 11:19:11 +00:00
|
|
|
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
|
2022-07-20 16:13:57 +00:00
|
|
|
const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
|
2022-12-07 15:04:37 +00:00
|
|
|
flushDependenciesForNonKernelCommand, isMarkerWithPostSyncWrite, &blitPropertiesContainer);
|
2019-09-02 13:54:57 +02:00
|
|
|
|
2022-02-02 16:30:03 +00:00
|
|
|
if (!blockQueue && isOOQEnabled()) {
|
|
|
|
|
setupBarrierTimestampForBcsEngines(computeCommandStreamReceiver.getOsContext().getEngineType(), timestampPacketDependencies);
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-01 16:00:22 +00:00
|
|
|
bool migratedMemory = false;
|
|
|
|
|
|
|
|
|
|
if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) {
|
|
|
|
|
for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) {
|
2021-12-10 21:31:34 +00:00
|
|
|
MigrationController::handleMigration(*this->context, computeCommandStreamReceiver, arg.second);
|
2021-07-01 16:00:22 +00:00
|
|
|
migratedMemory = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
if (!blockQueue) {
|
|
|
|
|
|
2019-08-30 09:37:44 +02:00
|
|
|
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
|
2021-12-10 21:31:34 +00:00
|
|
|
csrDeps.makeResident(computeCommandStreamReceiver);
|
2021-04-29 08:58:16 +00:00
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
completionStamp = enqueueNonBlocked<commandType>(
|
|
|
|
|
surfacesForResidency,
|
|
|
|
|
numSurfaceForResidency,
|
|
|
|
|
commandStream,
|
|
|
|
|
commandStreamStart,
|
|
|
|
|
blocking,
|
2021-06-18 10:35:54 +00:00
|
|
|
clearDependenciesForSubCapture,
|
2017-12-21 00:45:38 +01:00
|
|
|
multiDispatchInfo,
|
2019-11-09 19:02:25 +01:00
|
|
|
enqueueProperties,
|
2019-11-13 12:23:29 +01:00
|
|
|
timestampPacketDependencies,
|
2018-09-07 14:31:37 +02:00
|
|
|
eventsRequest,
|
2017-12-21 00:45:38 +01:00
|
|
|
eventBuilder,
|
|
|
|
|
taskLevel,
|
2022-05-04 13:04:47 +00:00
|
|
|
printfHandler.get());
|
2019-08-30 09:37:44 +02:00
|
|
|
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
|
2019-06-18 14:04:23 +02:00
|
|
|
completionStamp = enqueueCommandWithoutKernel(
|
2019-03-22 13:40:41 +01:00
|
|
|
surfacesForResidency,
|
|
|
|
|
numSurfaceForResidency,
|
2020-09-30 16:58:20 +02:00
|
|
|
&commandStream,
|
2019-03-22 13:40:41 +01:00
|
|
|
commandStreamStart,
|
|
|
|
|
blocking,
|
2019-08-30 09:37:44 +02:00
|
|
|
enqueueProperties,
|
2019-11-13 12:23:29 +01:00
|
|
|
timestampPacketDependencies,
|
2019-03-22 13:40:41 +01:00
|
|
|
eventsRequest,
|
|
|
|
|
eventBuilder,
|
2021-04-29 08:58:16 +00:00
|
|
|
taskLevel,
|
2021-08-25 16:03:15 +00:00
|
|
|
csrDeps,
|
|
|
|
|
nullptr);
|
2017-12-21 00:45:38 +01:00
|
|
|
} else {
|
2019-08-30 09:37:44 +02:00
|
|
|
UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::EnqueueWithoutSubmission);
|
2021-02-02 14:50:09 +01:00
|
|
|
|
|
|
|
|
auto maxTaskCountCurrentRootDevice = this->taskCount;
|
|
|
|
|
|
2021-03-11 13:48:04 +00:00
|
|
|
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
|
|
|
|
auto event = castToObject<Event>(eventWaitList[eventId]);
|
2021-02-02 14:50:09 +01:00
|
|
|
|
2021-03-11 13:48:04 +00:00
|
|
|
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() == this->getDevice().getRootDeviceIndex()) {
|
2021-02-02 14:50:09 +01:00
|
|
|
maxTaskCountCurrentRootDevice = std::max(maxTaskCountCurrentRootDevice, event->peekTaskCount());
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-04 13:04:47 +00:00
|
|
|
// inherit data from event_wait_list and previous packets
|
2017-12-21 00:45:38 +01:00
|
|
|
completionStamp.flushStamp = this->flushStamp->peekStamp();
|
2021-02-02 14:50:09 +01:00
|
|
|
completionStamp.taskCount = maxTaskCountCurrentRootDevice;
|
2017-12-21 00:45:38 +01:00
|
|
|
completionStamp.taskLevel = taskLevel;
|
|
|
|
|
|
|
|
|
|
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
|
|
|
|
eventBuilder.getEvent()->setSubmitTimeStamp();
|
|
|
|
|
eventBuilder.getEvent()->setStartTimeStamp();
|
|
|
|
|
}
|
2022-01-18 12:36:29 +00:00
|
|
|
|
2022-05-04 13:04:47 +00:00
|
|
|
// check if we have BCS associated, if so we need to make sure it is completed as well
|
2022-01-18 12:36:29 +00:00
|
|
|
if (eventBuilder.getEvent() && this->bcsEngineTypes.size() > 0u) {
|
|
|
|
|
eventBuilder.getEvent()->setupBcs(this->getBcsCommandStreamReceiver(this->bcsEngineTypes[0u])->getOsContext().getEngineType());
|
|
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2019-06-26 07:56:56 +02:00
|
|
|
if (eventBuilder.getEvent()) {
|
|
|
|
|
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
|
|
|
|
}
|
2020-06-24 13:32:09 +02:00
|
|
|
|
|
|
|
|
this->latestSentEnqueueType = enqueueProperties.operation;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2022-05-05 16:52:25 +00:00
|
|
|
|
2022-11-04 13:57:42 +00:00
|
|
|
if (completionStamp.taskCount > CompletionStamp::notReady) {
|
|
|
|
|
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
|
2022-05-05 16:52:25 +00:00
|
|
|
}
|
|
|
|
|
|
2020-08-26 11:26:44 +02:00
|
|
|
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
if (blockQueue) {
|
2019-09-02 13:54:57 +02:00
|
|
|
enqueueBlocked(commandType,
|
|
|
|
|
surfacesForResidency,
|
|
|
|
|
numSurfaceForResidency,
|
|
|
|
|
multiDispatchInfo,
|
2019-11-16 11:59:18 +01:00
|
|
|
timestampPacketDependencies,
|
2019-09-02 13:54:57 +02:00
|
|
|
blockedCommandsData,
|
|
|
|
|
enqueueProperties,
|
|
|
|
|
eventsRequest,
|
|
|
|
|
eventBuilder,
|
2021-08-26 15:31:09 +00:00
|
|
|
std::move(printfHandler),
|
2022-12-24 18:25:41 +01:00
|
|
|
nullptr);
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2021-06-21 17:39:40 +00:00
|
|
|
if (deferredTimestampPackets.get()) {
|
|
|
|
|
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
2022-05-11 14:34:38 +00:00
|
|
|
csrDeps.copyNodesToNewContainer(*deferredTimestampPackets);
|
2021-06-21 17:39:40 +00:00
|
|
|
}
|
|
|
|
|
|
2021-10-06 15:30:57 +00:00
|
|
|
commandStreamReceiverOwnership.unlock();
|
2022-03-04 09:46:29 +00:00
|
|
|
queueOwnership.unlock();
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
if (blocking) {
|
2022-03-21 11:08:43 +00:00
|
|
|
auto waitStatus = WaitStatus::Ready;
|
2021-10-21 01:29:53 +02:00
|
|
|
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
|
|
|
|
|
if (builtinOpParams.userPtrForPostOperationCpuCopy) {
|
2022-03-21 11:08:43 +00:00
|
|
|
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), false);
|
|
|
|
|
if (waitStatus == WaitStatus::GpuHang) {
|
|
|
|
|
return CL_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-21 01:29:53 +02:00
|
|
|
auto hostPtrAlloc = builtinOpParams.transferAllocation;
|
|
|
|
|
UNRECOVERABLE_IF(nullptr == hostPtrAlloc);
|
|
|
|
|
auto size = hostPtrAlloc->getUnderlyingBufferSize();
|
|
|
|
|
[[maybe_unused]] int cpuCopyStatus = memcpy_s(builtinOpParams.userPtrForPostOperationCpuCopy, size, hostPtrAlloc->getUnderlyingBuffer(), size);
|
|
|
|
|
DEBUG_BREAK_IF(cpuCopyStatus != 0);
|
2022-03-21 11:08:43 +00:00
|
|
|
|
|
|
|
|
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
|
2021-10-21 01:29:53 +02:00
|
|
|
} else {
|
2022-03-21 11:08:43 +00:00
|
|
|
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (waitStatus == WaitStatus::GpuHang) {
|
|
|
|
|
return CL_OUT_OF_RESOURCES;
|
2021-10-21 01:29:53 +02:00
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2022-03-21 11:08:43 +00:00
|
|
|
|
2021-07-01 16:00:22 +00:00
|
|
|
if (migratedMemory) {
|
2021-12-10 21:31:34 +00:00
|
|
|
computeCommandStreamReceiver.flushBatchedSubmissions();
|
2021-07-01 16:00:22 +00:00
|
|
|
}
|
2022-03-21 11:08:43 +00:00
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2019-02-21 17:44:17 +01:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <uint32_t commandType>
|
|
|
|
|
void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
|
|
|
|
|
std::unique_ptr<PrintfHandler> &printfHandler,
|
|
|
|
|
Event *event,
|
2021-03-24 18:21:13 +00:00
|
|
|
TagNodeBase *&hwTimeStamps,
|
2019-02-21 17:44:17 +01:00
|
|
|
bool blockQueue,
|
|
|
|
|
CsrDependencies &csrDeps,
|
2019-07-18 21:15:50 +02:00
|
|
|
KernelOperation *blockedCommandsData,
|
2019-11-13 12:23:29 +01:00
|
|
|
TimestampPacketDependencies ×tampPacketDependencies) {
|
2021-03-24 18:21:13 +00:00
|
|
|
TagNodeBase *hwPerfCounter = nullptr;
|
2021-10-08 11:02:35 +00:00
|
|
|
getClFileLogger().dumpKernelArgs(&multiDispatchInfo);
|
2019-02-21 17:44:17 +01:00
|
|
|
|
2022-09-29 16:36:48 +02:00
|
|
|
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, device->getDevice()));
|
2019-02-21 17:44:17 +01:00
|
|
|
if (printfHandler) {
|
2019-08-02 15:56:28 +02:00
|
|
|
printfHandler->prepareDispatch(multiDispatchInfo);
|
2019-02-21 17:44:17 +01:00
|
|
|
}
|
|
|
|
|
|
2021-03-22 15:26:03 +00:00
|
|
|
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
2020-04-09 17:33:07 +02:00
|
|
|
auto &gws = multiDispatchInfo.begin()->getGWS();
|
|
|
|
|
auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize();
|
|
|
|
|
size_t workGroupsCount = (gws.x * gws.y * gws.z) /
|
|
|
|
|
(lws.x * lws.y * lws.z);
|
2021-03-19 23:14:09 +00:00
|
|
|
device->getDevice().syncBufferHandler->prepareForEnqueue(workGroupsCount, *multiDispatchInfo.peekMainKernel());
|
2020-04-09 17:33:07 +02:00
|
|
|
}
|
|
|
|
|
|
2019-02-21 17:44:17 +01:00
|
|
|
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
2020-06-19 17:24:48 +02:00
|
|
|
if (multiDispatchInfo.peekMainKernel()->isKernelDebugEnabled()) {
|
2019-02-21 17:44:17 +01:00
|
|
|
setupDebugSurface(multiDispatchInfo.peekMainKernel());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-10 12:44:02 +02:00
|
|
|
if (event && this->isProfilingEnabled()) {
|
|
|
|
|
// Get allocation for timestamps
|
|
|
|
|
hwTimeStamps = event->getHwTimeStampNode();
|
2019-02-21 17:44:17 +01:00
|
|
|
}
|
|
|
|
|
|
2019-05-20 11:19:27 +02:00
|
|
|
if (event && this->isPerfCountersEnabled()) {
|
|
|
|
|
hwPerfCounter = event->getHwPerfCounterNode();
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-01 18:03:54 +00:00
|
|
|
HardwareInterfaceWalkerArgs dispatchWalkerArgs = {};
|
|
|
|
|
dispatchWalkerArgs.blockedCommandsData = blockedCommandsData;
|
|
|
|
|
dispatchWalkerArgs.hwTimeStamps = hwTimeStamps;
|
|
|
|
|
dispatchWalkerArgs.hwPerfCounter = hwPerfCounter;
|
|
|
|
|
dispatchWalkerArgs.timestampPacketDependencies = ×tampPacketDependencies;
|
|
|
|
|
dispatchWalkerArgs.currentTimestampPacketNodes = timestampPacketContainer.get();
|
|
|
|
|
dispatchWalkerArgs.commandType = commandType;
|
2022-07-01 18:03:54 +00:00
|
|
|
dispatchWalkerArgs.event = event;
|
2022-07-01 18:03:54 +00:00
|
|
|
|
2019-02-21 17:44:17 +01:00
|
|
|
HardwareInterface<GfxFamily>::dispatchWalker(
|
|
|
|
|
*this,
|
|
|
|
|
multiDispatchInfo,
|
|
|
|
|
csrDeps,
|
2022-07-01 18:03:54 +00:00
|
|
|
dispatchWalkerArgs);
|
2019-02-21 17:44:17 +01:00
|
|
|
|
|
|
|
|
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
|
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
|
|
|
|
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
|
2019-07-15 14:28:09 +02:00
|
|
|
getGpgpuCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
|
2019-02-21 17:44:17 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 14:28:09 +02:00
|
|
|
getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize());
|
2019-02-21 17:44:17 +01:00
|
|
|
}
|
2019-04-10 12:44:02 +02:00
|
|
|
|
2019-07-03 09:30:30 +02:00
|
|
|
template <typename GfxFamily>
|
2021-08-25 16:03:15 +00:00
|
|
|
BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandStreamReceiver &blitCommandStreamReceiver,
|
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo,
|
2019-11-13 12:23:29 +01:00
|
|
|
TimestampPacketDependencies ×tampPacketDependencies,
|
2020-09-30 16:58:20 +02:00
|
|
|
const EventsRequest &eventsRequest, LinearStream *commandStream,
|
2022-12-24 18:25:41 +01:00
|
|
|
uint32_t commandType, bool queueBlocked) {
|
2020-01-31 10:37:47 +01:00
|
|
|
auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType);
|
2019-07-03 09:30:30 +02:00
|
|
|
|
2021-08-25 16:03:15 +00:00
|
|
|
auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver,
|
2020-01-31 10:37:47 +01:00
|
|
|
multiDispatchInfo.peekBuiltinOpParams());
|
2019-09-04 11:34:23 +02:00
|
|
|
if (!queueBlocked) {
|
2021-08-25 16:03:15 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, blitCommandStreamReceiver,
|
2021-03-11 13:48:04 +00:00
|
|
|
CsrDependencies::DependenciesType::All);
|
2019-07-03 09:30:30 +02:00
|
|
|
|
2021-03-11 13:48:04 +00:00
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes);
|
|
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
|
|
|
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
2019-09-04 11:34:23 +02:00
|
|
|
}
|
2022-12-24 18:25:41 +01:00
|
|
|
|
2019-07-03 09:30:30 +02:00
|
|
|
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
2019-11-12 17:56:10 +01:00
|
|
|
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
|
2020-02-27 13:29:15 +01:00
|
|
|
|
2020-09-30 16:58:20 +02:00
|
|
|
if (commandStream) {
|
2020-07-17 11:03:04 +02:00
|
|
|
if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) {
|
2020-06-24 13:32:09 +02:00
|
|
|
auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]);
|
2021-12-20 14:37:33 +00:00
|
|
|
const auto &hwInfo = device->getHardwareInfo();
|
2021-12-20 21:37:45 +00:00
|
|
|
PipeControlArgs args;
|
2021-12-22 14:25:58 +00:00
|
|
|
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
|
2022-07-21 14:28:10 +00:00
|
|
|
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
2020-09-30 16:58:20 +02:00
|
|
|
*commandStream,
|
2022-07-21 14:28:10 +00:00
|
|
|
PostSyncMode::ImmediateData,
|
2020-06-24 13:32:09 +02:00
|
|
|
cacheFlushTimestampPacketGpuAddress,
|
|
|
|
|
0,
|
2021-12-20 14:37:33 +00:00
|
|
|
hwInfo,
|
2020-06-24 13:32:09 +02:00
|
|
|
args);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-08-30 09:37:44 +02:00
|
|
|
return blitProperties;
|
2019-07-03 09:30:30 +02:00
|
|
|
}
|
|
|
|
|
|
2019-11-09 19:02:25 +01:00
|
|
|
template <typename GfxFamily>
|
2021-08-25 16:03:15 +00:00
|
|
|
void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(CommandStreamReceiver &bcsCsr,
|
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo,
|
2019-11-09 19:02:25 +01:00
|
|
|
BlitPropertiesContainer &blitPropertiesContainer,
|
|
|
|
|
TimestampPacketDependencies ×tampPacketDependencies,
|
|
|
|
|
const EventsRequest &eventsRequest, bool queueBlocked) {
|
2022-04-05 16:47:19 +00:00
|
|
|
const auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
|
|
|
|
const auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
|
|
|
|
const auto numKernelObjs = multiDispatchInfo.getKernelObjsForAuxTranslation()->size();
|
2020-12-22 00:03:25 +00:00
|
|
|
blitPropertiesContainer.resize(numKernelObjs * 2);
|
2019-11-09 19:02:25 +01:00
|
|
|
|
|
|
|
|
auto bufferIndex = 0;
|
2022-04-05 16:47:19 +00:00
|
|
|
for (const auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) {
|
2020-12-22 00:03:25 +00:00
|
|
|
GraphicsAllocation *allocation = nullptr;
|
|
|
|
|
if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
|
2022-04-05 16:47:19 +00:00
|
|
|
const auto buffer = static_cast<Buffer *>(kernelObj.object);
|
2020-12-22 00:03:25 +00:00
|
|
|
allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
|
|
|
|
} else {
|
|
|
|
|
DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC);
|
|
|
|
|
allocation = static_cast<GraphicsAllocation *>(kernelObj.object);
|
|
|
|
|
}
|
2019-11-09 19:02:25 +01:00
|
|
|
{
|
|
|
|
|
// Aux to NonAux
|
2020-12-22 00:03:25 +00:00
|
|
|
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(
|
|
|
|
|
AuxTranslationDirection::AuxToNonAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
|
2022-04-05 16:47:19 +00:00
|
|
|
const auto auxToNonAuxNode = nodesAllocator->getTag();
|
2019-11-09 19:02:25 +01:00
|
|
|
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
// NonAux to Aux
|
2020-12-22 00:03:25 +00:00
|
|
|
blitPropertiesContainer[bufferIndex + numKernelObjs] = BlitProperties::constructPropertiesForAuxTranslation(
|
|
|
|
|
AuxTranslationDirection::NonAuxToAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation());
|
2022-04-05 16:47:19 +00:00
|
|
|
const auto nonAuxToAuxNode = nodesAllocator->getTag();
|
2019-11-09 19:02:25 +01:00
|
|
|
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
|
|
|
|
|
}
|
|
|
|
|
bufferIndex++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!queueBlocked) {
|
2020-01-27 13:06:03 +01:00
|
|
|
CsrDependencies csrDeps;
|
2021-08-25 16:03:15 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
2019-11-18 13:35:44 +01:00
|
|
|
BlitProperties::setupDependenciesForAuxTranslation(blitPropertiesContainer, timestampPacketDependencies,
|
2020-01-27 13:06:03 +01:00
|
|
|
*this->timestampPacketContainer, csrDeps,
|
2021-08-25 16:03:15 +00:00
|
|
|
getGpgpuCommandStreamReceiver(), bcsCsr);
|
2019-11-09 19:02:25 +01:00
|
|
|
}
|
2021-08-26 16:38:39 +00:00
|
|
|
|
|
|
|
|
eventsRequest.setupBcsCsrForOutputEvent(bcsCsr);
|
2019-11-09 19:02:25 +01:00
|
|
|
}
|
|
|
|
|
|
2019-04-10 12:44:02 +02:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
|
|
|
|
size_t numSurfaces,
|
|
|
|
|
LinearStream *commandStream,
|
|
|
|
|
CsrDependencies &csrDeps) {
|
|
|
|
|
|
2021-06-23 10:34:31 +00:00
|
|
|
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDeps);
|
2019-04-10 12:44:02 +02:00
|
|
|
|
|
|
|
|
uint64_t postSyncAddress = 0;
|
2019-07-15 14:28:09 +02:00
|
|
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
2019-04-10 12:44:02 +02:00
|
|
|
auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0);
|
2020-09-29 14:35:23 +02:00
|
|
|
timestampPacketNodeForPostSync->setProfilingCapable(false);
|
2020-05-19 16:20:41 +02:00
|
|
|
postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync);
|
2019-04-10 12:44:02 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
submitCacheFlush(surfaces, numSurfaces, commandStream, postSyncAddress);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-20 15:07:00 +00:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void CommandQueueHw<GfxFamily>::processDispatchForMarker(CommandQueue &commandQueue,
|
|
|
|
|
LinearStream *commandStream,
|
|
|
|
|
EventsRequest &eventsRequest,
|
|
|
|
|
CsrDependencies &csrDeps) {
|
|
|
|
|
auto event = castToObjectOrAbort<Event>(*eventsRequest.outEvent);
|
|
|
|
|
|
|
|
|
|
TagNodeBase *hwTimeStamps = nullptr;
|
|
|
|
|
TagNodeBase *hwPerfCounter = nullptr;
|
|
|
|
|
|
|
|
|
|
hwTimeStamps = event->getHwTimeStampNode();
|
|
|
|
|
|
|
|
|
|
HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
|
|
|
|
HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
2021-06-11 13:29:43 +00:00
|
|
|
getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation());
|
2021-05-20 15:07:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue,
|
|
|
|
|
LinearStream *commandStream,
|
|
|
|
|
EventsRequest &eventsRequest,
|
|
|
|
|
CsrDependencies &csrDeps) {
|
|
|
|
|
auto currentTimestampPacketNode = commandQueue.getTimestampPacketContainer()->peekNodes().at(0);
|
|
|
|
|
|
|
|
|
|
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode);
|
|
|
|
|
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode);
|
|
|
|
|
|
2022-04-06 12:35:32 +00:00
|
|
|
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress, false);
|
|
|
|
|
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress, false);
|
2021-05-20 15:07:00 +00:00
|
|
|
|
|
|
|
|
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode);
|
|
|
|
|
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode);
|
|
|
|
|
|
2022-04-06 12:35:32 +00:00
|
|
|
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress, false);
|
|
|
|
|
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress, false);
|
2021-05-20 15:07:00 +00:00
|
|
|
}
|
|
|
|
|
|
2017-12-22 16:05:10 +01:00
|
|
|
template <typename GfxFamily>
|
2022-11-22 13:53:59 +00:00
|
|
|
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
|
2017-12-22 16:05:10 +01:00
|
|
|
auto isQueueBlockedStatus = isQueueBlocked();
|
|
|
|
|
taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
|
2020-06-16 11:19:11 +00:00
|
|
|
blockQueueStatus = (taskLevel == CompletionStamp::notReady) || isQueueBlockedStatus;
|
2017-12-22 16:05:10 +01:00
|
|
|
|
2019-06-18 11:02:47 +02:00
|
|
|
auto taskLevelUpdateRequired = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType);
|
2019-07-03 09:30:30 +02:00
|
|
|
if (taskLevelUpdateRequired) {
|
2017-12-22 16:05:10 +01:00
|
|
|
taskLevel++;
|
|
|
|
|
this->taskLevel = taskLevel;
|
|
|
|
|
}
|
2020-08-26 11:26:44 +02:00
|
|
|
|
|
|
|
|
DBG_LOG(EventsDebugEnable, "blockQueue", blockQueueStatus, "virtualEvent", virtualEvent, "taskLevel", taskLevel);
|
2017-12-22 16:05:10 +01:00
|
|
|
}
|
|
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
template <typename GfxFamily>
|
2022-11-22 13:53:59 +00:00
|
|
|
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
|
2017-12-21 00:45:38 +01:00
|
|
|
bool updateTaskLevel = true;
|
2022-05-04 13:04:47 +00:00
|
|
|
// if we are blocked by user event then no update
|
2020-06-16 11:19:11 +00:00
|
|
|
if (taskLevel == CompletionStamp::notReady) {
|
2017-12-21 00:45:38 +01:00
|
|
|
updateTaskLevel = false;
|
|
|
|
|
}
|
2022-05-04 13:04:47 +00:00
|
|
|
// if we are executing command without kernel then it will inherit state from
|
|
|
|
|
// previous commands, barrier is exception
|
2017-12-21 00:45:38 +01:00
|
|
|
if (isCommandWithoutKernel(commandType) && commandType != CL_COMMAND_BARRIER) {
|
|
|
|
|
updateTaskLevel = false;
|
|
|
|
|
}
|
2022-05-04 13:04:47 +00:00
|
|
|
// ooq special cases starts here
|
2017-12-21 00:45:38 +01:00
|
|
|
if (this->isOOQEnabled()) {
|
2022-05-04 13:04:47 +00:00
|
|
|
// if no wait list and barrier , do not update task level
|
2017-12-21 00:45:38 +01:00
|
|
|
if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) {
|
|
|
|
|
updateTaskLevel = false;
|
|
|
|
|
}
|
2022-05-04 13:04:47 +00:00
|
|
|
// if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
|
2017-12-21 00:45:38 +01:00
|
|
|
if (eventWaitList != nullptr) {
|
|
|
|
|
auto taskLevelFromEvents = getTaskLevelFromWaitList(0, numEventsInWaitList, eventWaitList);
|
|
|
|
|
taskLevelFromEvents++;
|
|
|
|
|
if (taskLevelFromEvents <= this->taskLevel) {
|
|
|
|
|
updateTaskLevel = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return updateTaskLevel;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-04-09 16:39:32 +02:00
|
|
|
template <uint32_t commandType>
|
2017-12-21 00:45:38 +01:00
|
|
|
CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|
|
|
|
Surface **surfaces,
|
|
|
|
|
size_t surfaceCount,
|
|
|
|
|
LinearStream &commandStream,
|
|
|
|
|
size_t commandStreamStart,
|
|
|
|
|
bool &blocking,
|
2021-06-18 10:35:54 +00:00
|
|
|
bool clearDependenciesForSubCapture,
|
2017-12-21 00:45:38 +01:00
|
|
|
const MultiDispatchInfo &multiDispatchInfo,
|
2019-11-09 19:02:25 +01:00
|
|
|
const EnqueueProperties &enqueueProperties,
|
2019-11-13 12:23:29 +01:00
|
|
|
TimestampPacketDependencies ×tampPacketDependencies,
|
2018-09-07 14:31:37 +02:00
|
|
|
EventsRequest &eventsRequest,
|
2017-12-21 00:45:38 +01:00
|
|
|
EventBuilder &eventBuilder,
|
2022-11-22 13:53:59 +00:00
|
|
|
TaskCountType taskLevel,
|
2022-05-04 13:04:47 +00:00
|
|
|
PrintfHandler *printfHandler) {
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2018-10-15 10:35:45 +02:00
|
|
|
UNRECOVERABLE_IF(multiDispatchInfo.empty());
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
auto implicitFlush = false;
|
|
|
|
|
|
|
|
|
|
if (printfHandler) {
|
|
|
|
|
blocking = true;
|
2019-07-15 14:28:09 +02:00
|
|
|
printfHandler->makeResident(getGpgpuCommandStreamReceiver());
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2019-11-19 16:54:47 +01:00
|
|
|
|
2021-03-22 15:26:03 +00:00
|
|
|
if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) {
|
2021-03-19 23:14:09 +00:00
|
|
|
device->getDevice().syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver());
|
2019-11-19 16:54:47 +01:00
|
|
|
}
|
|
|
|
|
|
2018-10-02 14:37:30 -07:00
|
|
|
if (timestampPacketContainer) {
|
2019-07-15 14:28:09 +02:00
|
|
|
timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver());
|
2019-11-13 12:23:29 +01:00
|
|
|
timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver());
|
2020-03-09 13:48:30 +01:00
|
|
|
timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver());
|
2018-09-18 15:13:34 -07:00
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2019-08-22 17:02:37 +02:00
|
|
|
bool anyUncacheableArgs = false;
|
2017-12-21 00:45:38 +01:00
|
|
|
auto requiresCoherency = false;
|
2022-05-16 14:06:56 +00:00
|
|
|
for (auto surface : createRange(surfaces, surfaceCount)) {
|
2019-07-15 14:28:09 +02:00
|
|
|
surface->makeResident(getGpgpuCommandStreamReceiver());
|
2017-12-21 00:45:38 +01:00
|
|
|
requiresCoherency |= surface->IsCoherent;
|
2019-08-22 17:02:37 +02:00
|
|
|
if (!surface->allowsL3Caching()) {
|
|
|
|
|
anyUncacheableArgs = true;
|
|
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto mediaSamplerRequired = false;
|
2018-09-21 14:06:35 +02:00
|
|
|
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
2022-08-31 13:26:29 +00:00
|
|
|
auto systolicPipelineSelectMode = false;
|
2018-02-16 09:15:36 +01:00
|
|
|
Kernel *kernel = nullptr;
|
2020-12-17 00:36:45 +00:00
|
|
|
bool auxTranslationRequired = false;
|
2021-03-03 12:25:26 +00:00
|
|
|
bool useGlobalAtomics = false;
|
2019-08-22 17:02:37 +02:00
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
2018-02-16 09:15:36 +01:00
|
|
|
if (kernel != dispatchInfo.getKernel()) {
|
|
|
|
|
kernel = dispatchInfo.getKernel();
|
|
|
|
|
} else {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2019-07-15 14:28:09 +02:00
|
|
|
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
2018-02-16 09:15:36 +01:00
|
|
|
requiresCoherency |= kernel->requiresCoherency();
|
|
|
|
|
mediaSamplerRequired |= kernel->isVmeKernel();
|
2021-03-22 15:26:03 +00:00
|
|
|
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
|
2018-09-21 14:06:35 +02:00
|
|
|
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
2022-08-31 13:26:29 +00:00
|
|
|
systolicPipelineSelectMode |= kernel->requiresSystolicPipelineSelectMode();
|
2020-12-17 00:36:45 +00:00
|
|
|
auxTranslationRequired |= kernel->isAuxTranslationRequired();
|
2019-08-30 09:55:44 +02:00
|
|
|
if (kernel->hasUncacheableStatelessArgs()) {
|
2019-01-30 10:57:42 +01:00
|
|
|
anyUncacheableArgs = true;
|
|
|
|
|
}
|
2020-01-29 14:15:10 +01:00
|
|
|
|
2021-03-22 15:26:03 +00:00
|
|
|
if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) {
|
2021-03-03 12:25:26 +00:00
|
|
|
useGlobalAtomics = true;
|
|
|
|
|
}
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mediaSamplerRequired) {
|
|
|
|
|
DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isProfilingEnabled() && eventBuilder.getEvent()) {
|
2021-09-28 15:05:21 +00:00
|
|
|
eventBuilder.getEvent()->setSubmitTimeStamp();
|
2021-02-25 14:50:02 +00:00
|
|
|
|
|
|
|
|
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();
|
|
|
|
|
if (hwTimestampNode) {
|
|
|
|
|
getGpgpuCommandStreamReceiver().makeResident(*hwTimestampNode->getBaseGraphicsAllocation());
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
if (isPerfCountersEnabled()) {
|
2019-07-15 14:28:09 +02:00
|
|
|
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getBaseGraphicsAllocation());
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IndirectHeap *dsh = nullptr;
|
|
|
|
|
IndirectHeap *ioh = nullptr;
|
|
|
|
|
|
2022-01-26 10:59:30 +00:00
|
|
|
dsh = &getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u);
|
|
|
|
|
ioh = &getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2018-11-05 05:26:45 -08:00
|
|
|
auto allocNeedsFlushDC = false;
|
|
|
|
|
if (!device->isFullRangeSvm()) {
|
2019-07-15 14:28:09 +02:00
|
|
|
if (std::any_of(getGpgpuCommandStreamReceiver().getResidencyAllocations().begin(), getGpgpuCommandStreamReceiver().getResidencyAllocations().end(), [](const auto allocation) { return allocation->isFlushL3Required(); })) {
|
2018-11-05 05:26:45 -08:00
|
|
|
allocNeedsFlushDC = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-04 09:45:07 +00:00
|
|
|
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
|
2020-12-17 00:36:45 +00:00
|
|
|
|
2019-09-13 12:00:30 +02:00
|
|
|
DispatchFlags dispatchFlags(
|
2022-10-14 10:42:22 +00:00
|
|
|
{}, // csrDependencies
|
|
|
|
|
×tampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
|
|
|
|
|
{}, // pipelineSelectArgs
|
|
|
|
|
this->flushStamp->getStampReference(), // flushStampReference
|
|
|
|
|
getThrottle(), // throttle
|
|
|
|
|
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), // preemptionMode
|
|
|
|
|
numGrfRequired, // numGrfRequired
|
|
|
|
|
L3CachingSettings::l3CacheOn, // l3CacheSettings
|
|
|
|
|
kernel->getDescriptor().kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
|
|
|
|
|
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
|
|
|
|
|
kernel->getExecutionType(), // kernelExecutionType
|
|
|
|
|
memoryCompressionState, // memoryCompressionState
|
|
|
|
|
getSliceCount(), // sliceCount
|
|
|
|
|
blocking, // blocking
|
|
|
|
|
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
|
|
|
|
|
multiDispatchInfo.usesSlm(), // useSLM
|
|
|
|
|
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
|
|
|
|
|
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
|
|
|
|
|
requiresCoherency, // requiresCoherency
|
|
|
|
|
(QueuePriority::LOW == priority), // lowPriority
|
|
|
|
|
implicitFlush, // implicitFlush
|
|
|
|
|
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
|
|
|
|
false, // epilogueRequired
|
|
|
|
|
false, // usePerDssBackedBuffer
|
|
|
|
|
useGlobalAtomics, // useGlobalAtomics
|
|
|
|
|
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
|
|
|
|
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
2022-11-19 18:25:04 +00:00
|
|
|
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
|
2022-11-26 20:10:32 +00:00
|
|
|
false, // hasStallingCmds
|
|
|
|
|
false); // hasRelaxedOrderingDependencies
|
2019-09-13 12:00:30 +02:00
|
|
|
|
2019-09-10 16:13:11 +02:00
|
|
|
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
2022-08-31 13:26:29 +00:00
|
|
|
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
2019-09-13 12:00:30 +02:00
|
|
|
|
2022-02-10 23:33:40 +00:00
|
|
|
dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion;
|
|
|
|
|
|
2021-12-29 14:28:21 +00:00
|
|
|
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
|
|
|
|
|
2021-06-18 10:35:54 +00:00
|
|
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
|
2021-03-11 13:48:04 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
2021-12-29 14:28:21 +00:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
|
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
|
|
|
|
}
|
2019-09-04 11:34:23 +02:00
|
|
|
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
2018-09-07 14:31:37 +02:00
|
|
|
}
|
2019-09-13 12:00:30 +02:00
|
|
|
|
2020-06-16 11:19:11 +00:00
|
|
|
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2019-08-22 17:02:37 +02:00
|
|
|
if (anyUncacheableArgs) {
|
|
|
|
|
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
|
2019-08-26 17:03:13 +02:00
|
|
|
} else if (!kernel->areStatelessWritesUsed()) {
|
|
|
|
|
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
|
2019-08-22 17:02:37 +02:00
|
|
|
}
|
|
|
|
|
|
2019-10-11 12:54:10 +08:00
|
|
|
if (this->dispatchHints != 0) {
|
2019-10-23 14:04:02 +08:00
|
|
|
dispatchFlags.engineHints = this->dispatchHints;
|
2019-10-11 12:54:10 +08:00
|
|
|
dispatchFlags.epilogueRequired = true;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-22 18:46:09 +01:00
|
|
|
if (gtpinIsGTPinInitialized()) {
|
|
|
|
|
gtpinNotifyPreFlushTask(this);
|
|
|
|
|
}
|
2018-02-08 16:00:20 +01:00
|
|
|
|
2019-11-09 19:02:25 +01:00
|
|
|
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
|
2022-05-04 13:04:47 +00:00
|
|
|
auto bcsCsr = getBcsForAuxTranslation();
|
2022-02-03 13:29:48 +00:00
|
|
|
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
|
2022-11-04 13:57:42 +00:00
|
|
|
if (newTaskCount > CompletionStamp::notReady) {
|
2022-05-05 16:52:25 +00:00
|
|
|
CompletionStamp completionStamp{};
|
2022-11-04 13:57:42 +00:00
|
|
|
completionStamp.taskCount = newTaskCount;
|
2022-05-05 16:52:25 +00:00
|
|
|
|
|
|
|
|
return completionStamp;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-04 13:57:42 +00:00
|
|
|
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
2020-05-04 10:25:08 +02:00
|
|
|
dispatchFlags.implicitFlush = true;
|
2019-11-09 19:02:25 +01:00
|
|
|
}
|
|
|
|
|
|
2020-09-25 11:24:15 +02:00
|
|
|
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
|
2019-07-15 14:28:09 +02:00
|
|
|
CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask(
|
2017-12-21 00:45:38 +01:00
|
|
|
commandStream,
|
|
|
|
|
commandStreamStart,
|
2022-03-28 12:55:12 +00:00
|
|
|
dsh,
|
|
|
|
|
ioh,
|
|
|
|
|
&getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u),
|
2017-12-21 00:45:38 +01:00
|
|
|
taskLevel,
|
2018-08-01 10:01:41 +02:00
|
|
|
dispatchFlags,
|
2020-01-14 14:32:11 +01:00
|
|
|
getDevice());
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2021-12-29 14:28:21 +00:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
|
clearLastBcsPackets();
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-19 16:32:40 +01:00
|
|
|
if (gtpinIsGTPinInitialized()) {
|
|
|
|
|
gtpinNotifyFlushTask(completionStamp.taskCount);
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-21 00:45:38 +01:00
|
|
|
return completionStamp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
|
void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
2019-09-02 13:54:57 +02:00
|
|
|
uint32_t commandType,
|
2017-12-21 00:45:38 +01:00
|
|
|
Surface **surfaces,
|
|
|
|
|
size_t surfaceCount,
|
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo,
|
2019-11-16 11:59:18 +01:00
|
|
|
TimestampPacketDependencies ×tampPacketDependencies,
|
2019-07-18 21:15:50 +02:00
|
|
|
std::unique_ptr<KernelOperation> &blockedCommandsData,
|
2019-09-02 10:16:44 +02:00
|
|
|
const EnqueueProperties &enqueueProperties,
|
2018-09-19 10:34:33 -07:00
|
|
|
EventsRequest &eventsRequest,
|
2017-12-21 00:45:38 +01:00
|
|
|
EventBuilder &externalEventBuilder,
|
2021-10-21 12:37:31 +00:00
|
|
|
std::unique_ptr<PrintfHandler> &&printfHandler,
|
2022-12-24 18:25:41 +01:00
|
|
|
CommandStreamReceiver *bcsCsr) {
|
2017-12-21 00:45:38 +01:00
|
|
|
|
|
|
|
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
|
|
|
|
|
2022-05-04 13:04:47 +00:00
|
|
|
// store previous virtual event as it will add dependecies to new virtual event
|
2017-12-21 00:45:38 +01:00
|
|
|
if (this->virtualEvent) {
|
|
|
|
|
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "previousVirtualEvent", this->virtualEvent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
EventBuilder internalEventBuilder;
|
|
|
|
|
EventBuilder *eventBuilder;
|
|
|
|
|
// check if event will be exposed externally
|
|
|
|
|
if (externalEventBuilder.getEvent()) {
|
|
|
|
|
externalEventBuilder.getEvent()->incRefInternal();
|
|
|
|
|
eventBuilder = &externalEventBuilder;
|
|
|
|
|
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "output event as virtualEvent", virtualEvent);
|
|
|
|
|
} else {
|
|
|
|
|
// it will be an internal event
|
|
|
|
|
internalEventBuilder.create<VirtualEvent>(this, context);
|
|
|
|
|
eventBuilder = &internalEventBuilder;
|
|
|
|
|
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "new virtualEvent", eventBuilder->getEvent());
|
|
|
|
|
}
|
2019-07-22 20:55:09 +02:00
|
|
|
auto outEvent = eventBuilder->getEvent();
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2022-05-04 13:04:47 +00:00
|
|
|
// update queue taskCount
|
2019-07-22 20:55:09 +02:00
|
|
|
taskCount = outEvent->getCompletionStamp();
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2019-07-22 20:55:09 +02:00
|
|
|
std::unique_ptr<Command> command;
|
2019-09-02 13:54:57 +02:00
|
|
|
bool storeTimestampPackets = false;
|
|
|
|
|
|
|
|
|
|
if (blockedCommandsData) {
|
2019-11-18 13:35:44 +01:00
|
|
|
if (enqueueProperties.blitPropertiesContainer) {
|
2019-11-07 09:15:53 +01:00
|
|
|
blockedCommandsData->blitPropertiesContainer = *enqueueProperties.blitPropertiesContainer;
|
2021-08-26 15:31:09 +00:00
|
|
|
blockedCommandsData->bcsCsr = bcsCsr;
|
2019-09-02 13:54:57 +02:00
|
|
|
blockedCommandsData->blitEnqueue = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
storeTimestampPackets = (timestampPacketContainer != nullptr);
|
|
|
|
|
}
|
2018-10-05 12:51:57 -07:00
|
|
|
|
2019-09-02 10:16:44 +02:00
|
|
|
if (enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel) {
|
2019-09-02 12:49:36 +02:00
|
|
|
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData);
|
2017-12-21 00:45:38 +01:00
|
|
|
} else {
|
2022-05-04 13:04:47 +00:00
|
|
|
// store task data in event
|
2017-12-21 00:45:38 +01:00
|
|
|
std::vector<Surface *> allSurfaces;
|
2018-02-16 09:15:36 +01:00
|
|
|
Kernel *kernel = nullptr;
|
2017-12-21 00:45:38 +01:00
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
2018-02-16 09:15:36 +01:00
|
|
|
if (kernel != dispatchInfo.getKernel()) {
|
|
|
|
|
kernel = dispatchInfo.getKernel();
|
|
|
|
|
} else {
|
|
|
|
|
continue;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2021-03-22 11:06:23 +00:00
|
|
|
kernel->getResidency(allSurfaces);
|
2018-02-16 09:15:36 +01:00
|
|
|
}
|
2022-03-01 15:14:04 +00:00
|
|
|
|
|
|
|
|
allSurfaces.reserve(allSurfaces.size() + surfaceCount);
|
2022-05-16 14:06:56 +00:00
|
|
|
for (auto &surface : createRange(surfaces, surfaceCount)) {
|
2018-02-16 09:15:36 +01:00
|
|
|
allSurfaces.push_back(surface->duplicate());
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2019-11-12 13:59:37 +01:00
|
|
|
|
2021-09-22 22:24:59 +00:00
|
|
|
PreemptionMode preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
|
2022-01-13 15:27:58 +00:00
|
|
|
bool slmUsed = multiDispatchInfo.usesSlm();
|
2019-07-22 20:55:09 +02:00
|
|
|
command = std::make_unique<CommandComputeKernel>(*this,
|
|
|
|
|
blockedCommandsData,
|
2022-03-01 15:14:04 +00:00
|
|
|
std::move(allSurfaces),
|
2019-07-22 20:55:09 +02:00
|
|
|
shouldFlushDC(commandType, printfHandler.get()),
|
|
|
|
|
slmUsed,
|
2021-12-07 08:40:35 +00:00
|
|
|
commandType,
|
2019-07-22 20:55:09 +02:00
|
|
|
std::move(printfHandler),
|
|
|
|
|
preemptionMode,
|
|
|
|
|
multiDispatchInfo.peekMainKernel(),
|
2022-12-24 18:25:41 +01:00
|
|
|
(uint32_t)multiDispatchInfo.size());
|
2019-07-22 20:55:09 +02:00
|
|
|
}
|
|
|
|
|
if (storeTimestampPackets) {
|
2019-11-16 11:59:18 +01:00
|
|
|
command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies));
|
2019-07-22 20:55:09 +02:00
|
|
|
command->setEventsRequest(eventsRequest);
|
2021-06-14 15:33:53 +00:00
|
|
|
} else if (this->context->getRootDeviceIndices().size() > 1) {
|
|
|
|
|
command->setEventsRequest(eventsRequest);
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
2021-06-14 15:33:53 +00:00
|
|
|
|
2019-07-22 20:55:09 +02:00
|
|
|
outEvent->setCommand(std::move(command));
|
2017-12-21 00:45:38 +01:00
|
|
|
|
2018-09-19 10:34:33 -07:00
|
|
|
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventsRequest.eventWaitList, eventsRequest.numEventsInWaitList));
|
2017-12-21 00:45:38 +01:00
|
|
|
eventBuilder->addParentEvent(this->virtualEvent);
|
|
|
|
|
eventBuilder->finalize();
|
|
|
|
|
|
|
|
|
|
if (this->virtualEvent) {
|
|
|
|
|
this->virtualEvent->decRefInternal();
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-22 20:55:09 +02:00
|
|
|
this->virtualEvent = outEvent;
|
2017-12-21 00:45:38 +01:00
|
|
|
}
|
|
|
|
|
|
2018-01-24 09:57:20 +01:00
|
|
|
template <typename GfxFamily>
|
2019-03-22 13:40:41 +01:00
|
|
|
CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|
|
|
|
Surface **surfaces,
|
|
|
|
|
size_t surfaceCount,
|
2020-09-30 16:58:20 +02:00
|
|
|
LinearStream *commandStream,
|
2019-03-22 13:40:41 +01:00
|
|
|
size_t commandStreamStart,
|
|
|
|
|
bool &blocking,
|
2019-08-30 09:37:44 +02:00
|
|
|
const EnqueueProperties &enqueueProperties,
|
2019-11-13 12:23:29 +01:00
|
|
|
TimestampPacketDependencies ×tampPacketDependencies,
|
2019-03-22 13:40:41 +01:00
|
|
|
EventsRequest &eventsRequest,
|
|
|
|
|
EventBuilder &eventBuilder,
|
2022-11-22 13:53:59 +00:00
|
|
|
TaskCountType taskLevel,
|
2021-08-25 16:03:15 +00:00
|
|
|
CsrDependencies &csrDeps,
|
|
|
|
|
CommandStreamReceiver *bcsCsr) {
|
2019-03-22 13:40:41 +01:00
|
|
|
|
2020-06-24 13:32:09 +02:00
|
|
|
CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()};
|
|
|
|
|
bool flushGpgpuCsr = true;
|
2019-04-10 12:44:02 +02:00
|
|
|
|
2022-03-04 09:46:29 +00:00
|
|
|
if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && commandStream == nullptr) {
|
2020-06-24 13:32:09 +02:00
|
|
|
flushGpgpuCsr = false;
|
2021-04-29 08:58:16 +00:00
|
|
|
} else {
|
|
|
|
|
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
|
2019-12-04 02:13:42 -07:00
|
|
|
}
|
|
|
|
|
|
2020-08-28 17:39:45 +02:00
|
|
|
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
2021-09-28 15:05:21 +00:00
|
|
|
eventBuilder.getEvent()->setSubmitTimeStamp();
|
2021-05-20 15:07:00 +00:00
|
|
|
eventBuilder.getEvent()->setStartTimeStamp();
|
2020-08-28 17:39:45 +02:00
|
|
|
}
|
|
|
|
|
|
2020-06-24 13:32:09 +02:00
|
|
|
if (flushGpgpuCsr) {
|
|
|
|
|
if (timestampPacketContainer) {
|
|
|
|
|
timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver());
|
|
|
|
|
timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver());
|
|
|
|
|
timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver());
|
|
|
|
|
}
|
2020-04-29 14:06:01 +02:00
|
|
|
|
2022-05-16 14:06:56 +00:00
|
|
|
for (auto surface : createRange(surfaces, surfaceCount)) {
|
2020-06-24 13:32:09 +02:00
|
|
|
surface->makeResident(getGpgpuCommandStreamReceiver());
|
|
|
|
|
}
|
2019-07-19 14:31:12 +02:00
|
|
|
|
2021-07-01 16:00:22 +00:00
|
|
|
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
2020-06-24 13:32:09 +02:00
|
|
|
DispatchFlags dispatchFlags(
|
2022-05-04 13:04:47 +00:00
|
|
|
{}, // csrDependencies
|
|
|
|
|
×tampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
|
|
|
|
|
{}, // pipelineSelectArgs
|
|
|
|
|
flushStamp->getStampReference(), // flushStampReference
|
|
|
|
|
getThrottle(), // throttle
|
|
|
|
|
device->getPreemptionMode(), // preemptionMode
|
|
|
|
|
GrfConfig::NotApplicable, // numGrfRequired
|
|
|
|
|
L3CachingSettings::NotApplicable, // l3CacheSettings
|
|
|
|
|
ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
|
|
|
|
|
AdditionalKernelExecInfo::NotApplicable, // additionalKernelExecInfo
|
|
|
|
|
KernelExecutionType::NotApplicable, // kernelExecutionType
|
|
|
|
|
MemoryCompressionState::NotApplicable, // memoryCompressionState
|
|
|
|
|
getSliceCount(), // sliceCount
|
|
|
|
|
blocking, // blocking
|
|
|
|
|
false, // dcFlush
|
|
|
|
|
false, // useSLM
|
|
|
|
|
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
|
|
|
|
|
false, // GSBA32BitRequired
|
|
|
|
|
false, // requiresCoherency
|
|
|
|
|
false, // lowPriority
|
|
|
|
|
(enqueueProperties.operation == EnqueueProperties::Operation::Blit), // implicitFlush
|
|
|
|
|
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
|
|
|
|
|
false, // epilogueRequired
|
|
|
|
|
false, // usePerDssBackedBuffer
|
|
|
|
|
false, // useGlobalAtomics
|
|
|
|
|
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
|
|
|
|
false, // memoryMigrationRequired
|
2022-11-19 18:25:04 +00:00
|
|
|
false, // textureCacheFlush
|
2022-11-26 20:10:32 +00:00
|
|
|
false, // hasStallingCmds
|
|
|
|
|
false); // hasRelaxedOrderingDependencies
|
2020-06-24 13:32:09 +02:00
|
|
|
|
2021-12-29 14:28:21 +00:00
|
|
|
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
|
|
|
|
|
2020-06-24 13:32:09 +02:00
|
|
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
2021-03-11 13:48:04 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
2021-12-29 14:28:21 +00:00
|
|
|
if (isHandlingBarrier) {
|
|
|
|
|
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
|
|
|
|
|
}
|
2020-06-24 13:32:09 +02:00
|
|
|
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
completionStamp = getGpgpuCommandStreamReceiver().flushTask(
|
2020-09-30 16:58:20 +02:00
|
|
|
*commandStream,
|
2020-06-24 13:32:09 +02:00
|
|
|
commandStreamStart,
|
2022-03-28 12:55:12 +00:00
|
|
|
&getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u),
|
|
|
|
|
&getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u),
|
|
|
|
|
&getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u),
|
2020-06-24 13:32:09 +02:00
|
|
|
taskLevel,
|
|
|
|
|
dispatchFlags,
|
|
|
|
|
getDevice());
|
2021-12-29 14:28:21 +00:00
|
|
|
|
|
|
|
|
if (isHandlingBarrier) {
|
|
|
|
|
clearLastBcsPackets();
|
|
|
|
|
}
|
2019-04-10 12:44:02 +02:00
|
|
|
}
|
2019-03-22 13:40:41 +01:00
|
|
|
|
2020-06-19 16:33:53 +02:00
|
|
|
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
|
|
|
|
|
UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer);
|
2022-02-03 13:29:48 +00:00
|
|
|
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
|
2022-11-04 13:57:42 +00:00
|
|
|
if (newTaskCount > CompletionStamp::notReady) {
|
2022-05-05 16:52:25 +00:00
|
|
|
CompletionStamp completionStamp{};
|
2022-11-04 13:57:42 +00:00
|
|
|
completionStamp.taskCount = newTaskCount;
|
2022-05-05 16:52:25 +00:00
|
|
|
|
|
|
|
|
return completionStamp;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-04 13:57:42 +00:00
|
|
|
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
2020-06-19 16:33:53 +02:00
|
|
|
}
|
|
|
|
|
|
2019-03-22 13:40:41 +01:00
|
|
|
return completionStamp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-01-24 09:57:20 +01:00
|
|
|
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
|
|
|
|
size_t *hostOffset,
|
|
|
|
|
const size_t *bufferOrigin,
|
|
|
|
|
const size_t *hostOrigin,
|
|
|
|
|
const size_t *region,
|
|
|
|
|
size_t bufferRowPitch,
|
|
|
|
|
size_t bufferSlicePitch,
|
|
|
|
|
size_t hostRowPitch,
|
|
|
|
|
size_t hostSlicePitch) {
|
|
|
|
|
size_t computedBufferRowPitch = bufferRowPitch ? bufferRowPitch : region[0];
|
|
|
|
|
size_t computedBufferSlicePitch = bufferSlicePitch ? bufferSlicePitch : region[1] * computedBufferRowPitch;
|
|
|
|
|
size_t computedHostRowPitch = hostRowPitch ? hostRowPitch : region[0];
|
|
|
|
|
size_t computedHostSlicePitch = hostSlicePitch ? hostSlicePitch : region[1] * computedHostRowPitch;
|
|
|
|
|
*bufferOffset = bufferOrigin[2] * computedBufferSlicePitch + bufferOrigin[1] * computedBufferRowPitch + bufferOrigin[0];
|
|
|
|
|
*hostOffset = hostOrigin[2] * computedHostSlicePitch + hostOrigin[1] * computedHostRowPitch + hostOrigin[0];
|
|
|
|
|
}
|
2018-02-08 22:52:58 +01:00
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2018-12-11 17:06:47 +01:00
|
|
|
size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) {
|
2020-01-08 17:29:15 +01:00
|
|
|
auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes;
|
2018-02-08 22:52:58 +01:00
|
|
|
auto dstRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel;
|
|
|
|
|
auto dstSlicePitch = slicePitch ? slicePitch : ((image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch);
|
|
|
|
|
|
|
|
|
|
return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type);
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
|
2022-07-20 15:05:27 +00:00
|
|
|
template <typename GfxFamily>
|
2022-08-30 14:10:57 +00:00
|
|
|
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
|
|
|
|
|
constexpr size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte;
|
|
|
|
|
|
2022-09-05 10:15:40 +00:00
|
|
|
auto bcsSplit = getDevice().isBcsSplitSupported() &&
|
2022-07-20 15:05:27 +00:00
|
|
|
csr.getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
|
2022-08-30 14:10:57 +00:00
|
|
|
transferSize >= minimalSizeForBcsSplit &&
|
2022-11-10 13:36:19 +00:00
|
|
|
transferDirection != TransferDirection::LocalToLocal;
|
2022-07-20 15:05:27 +00:00
|
|
|
|
|
|
|
|
if (bcsSplit) {
|
|
|
|
|
this->constructBcsEnginesForSplit();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return bcsSplit;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-30 14:10:57 +00:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
size_t CommandQueueHw<GfxFamily>::getTotalSizeFromRectRegion(const size_t *region) {
|
|
|
|
|
auto size = region[0];
|
|
|
|
|
size *= (region[1] == 0 ? 1 : region[1]);
|
|
|
|
|
size *= (region[2] == 0 ? 1 : region[2]);
|
|
|
|
|
return size;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-20 15:05:27 +00:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <uint32_t cmdType>
|
|
|
|
|
cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
|
|
|
|
|
auto ret = CL_SUCCESS;
|
|
|
|
|
this->releaseMainCopyEngine();
|
|
|
|
|
|
2022-09-05 09:46:31 +00:00
|
|
|
StackVec<std::unique_lock<CommandStreamReceiver::MutexType>, 4u> locks;
|
|
|
|
|
StackVec<CommandStreamReceiver *, 4u> copyEngines;
|
2022-08-19 10:36:31 +00:00
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < bcsInfoMaskSize; i++) {
|
|
|
|
|
if (this->splitEngines.test(i)) {
|
|
|
|
|
auto engineType = EngineHelpers::mapBcsIndexToEngineType(i, true);
|
|
|
|
|
auto bcs = getBcsCommandStreamReceiver(engineType);
|
|
|
|
|
if (bcs) {
|
|
|
|
|
locks.push_back(std::move(bcs->obtainUniqueOwnership()));
|
|
|
|
|
copyEngines.push_back(bcs);
|
|
|
|
|
}
|
2022-07-20 15:05:27 +00:00
|
|
|
}
|
|
|
|
|
}
|
2022-08-19 10:36:31 +00:00
|
|
|
|
2022-07-20 15:05:27 +00:00
|
|
|
DEBUG_BREAK_IF(copyEngines.size() == 0);
|
|
|
|
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
|
|
|
|
|
2022-12-07 15:04:37 +00:00
|
|
|
if (isOOQEnabled() && getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) {
|
|
|
|
|
NullSurface s;
|
|
|
|
|
Surface *surfaces[] = {&s};
|
|
|
|
|
BuiltinOpParams params{};
|
|
|
|
|
params.bcsSplit = true;
|
|
|
|
|
MultiDispatchInfo di(params);
|
|
|
|
|
ret = enqueueHandler<CL_COMMAND_MARKER>(surfaces,
|
|
|
|
|
false,
|
|
|
|
|
di,
|
|
|
|
|
numEventsInWaitList,
|
|
|
|
|
eventWaitList,
|
|
|
|
|
event);
|
|
|
|
|
DEBUG_BREAK_IF(ret != CL_SUCCESS);
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-20 15:05:27 +00:00
|
|
|
TimestampPacketContainer splitNodes;
|
|
|
|
|
TimestampPacketContainer previousEnqueueNode;
|
|
|
|
|
previousEnqueueNode.swapNodes(*this->timestampPacketContainer);
|
|
|
|
|
|
2022-09-07 16:49:50 +00:00
|
|
|
auto srcOffset = dispatchInfo.peekBuiltinOpParams().srcOffset.x;
|
|
|
|
|
auto dstOffset = dispatchInfo.peekBuiltinOpParams().dstOffset.x;
|
|
|
|
|
auto size = dispatchInfo.peekBuiltinOpParams().size.x;
|
2022-07-20 15:05:27 +00:00
|
|
|
auto remainingSize = size;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < copyEngines.size(); i++) {
|
2022-09-07 16:49:50 +00:00
|
|
|
auto localSize = remainingSize / (copyEngines.size() - i);
|
2022-07-20 15:05:27 +00:00
|
|
|
auto localParams = dispatchInfo.peekBuiltinOpParams();
|
2022-09-07 16:49:50 +00:00
|
|
|
localParams.size.x = localSize;
|
|
|
|
|
localParams.srcOffset.x = (srcOffset + size - remainingSize);
|
|
|
|
|
localParams.dstOffset.x = (dstOffset + size - remainingSize);
|
2022-07-20 15:05:27 +00:00
|
|
|
|
|
|
|
|
dispatchInfo.setBuiltinOpParams(localParams);
|
2022-09-07 16:49:50 +00:00
|
|
|
remainingSize -= localSize;
|
2022-07-20 15:05:27 +00:00
|
|
|
|
|
|
|
|
this->timestampPacketContainer->assignAndIncrementNodesRefCounts(previousEnqueueNode);
|
|
|
|
|
|
|
|
|
|
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, remainingSize == 0 ? event : nullptr, false, *copyEngines[i]);
|
|
|
|
|
DEBUG_BREAK_IF(ret != CL_SUCCESS);
|
|
|
|
|
|
|
|
|
|
this->timestampPacketContainer->moveNodesToNewContainer(splitNodes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (event) {
|
|
|
|
|
auto e = castToObjectOrAbort<Event>(*event);
|
|
|
|
|
e->addTimestampPacketNodes(splitNodes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this->timestampPacketContainer->swapNodes(splitNodes);
|
|
|
|
|
|
2022-09-05 09:46:31 +00:00
|
|
|
queueOwnership.unlock();
|
|
|
|
|
for (auto &lock : locks) {
|
|
|
|
|
lock.unlock();
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-20 15:05:27 +00:00
|
|
|
if (blocking) {
|
|
|
|
|
ret = this->finish();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-25 14:00:20 +02:00
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <uint32_t cmdType>
|
2022-03-21 11:08:43 +00:00
|
|
|
cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
|
2022-03-01 11:24:30 +00:00
|
|
|
auto bcsCommandStreamReceiverOwnership = bcsCsr.obtainUniqueOwnership();
|
2022-04-20 09:13:26 +00:00
|
|
|
std::unique_lock<NEO::CommandStreamReceiver::MutexType> commandStreamReceiverOwnership;
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
|
EventBuilder eventBuilder;
|
|
|
|
|
|
2020-08-26 11:26:44 +02:00
|
|
|
setupEvent(eventBuilder, eventsRequest.outEvent, cmdType);
|
2021-08-26 16:38:39 +00:00
|
|
|
eventsRequest.setupBcsCsrForOutputEvent(bcsCsr);
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
std::unique_ptr<KernelOperation> blockedCommandsData;
|
|
|
|
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
2022-08-03 22:41:23 +00:00
|
|
|
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
|
|
|
|
commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
auto blockQueue = false;
|
2022-11-22 13:53:59 +00:00
|
|
|
TaskCountType taskLevel = 0u;
|
2020-08-25 14:00:20 +02:00
|
|
|
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType);
|
|
|
|
|
auto clearAllDependencies = queueDependenciesClearRequired();
|
|
|
|
|
|
|
|
|
|
enqueueHandlerHook(cmdType, multiDispatchInfo);
|
|
|
|
|
aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo);
|
|
|
|
|
|
|
|
|
|
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
|
|
|
|
|
blocking = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TimestampPacketDependencies timestampPacketDependencies;
|
|
|
|
|
BlitPropertiesContainer blitPropertiesContainer;
|
|
|
|
|
CsrDependencies csrDeps;
|
|
|
|
|
|
2021-08-26 11:53:25 +00:00
|
|
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
|
|
|
|
auto allocator = bcsCsr.getTimestampPacketAllocator();
|
2020-08-25 14:00:20 +02:00
|
|
|
|
2021-12-29 14:28:21 +00:00
|
|
|
if (!blockQueue) {
|
|
|
|
|
setupBarrierTimestampForBcsEngines(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
|
2022-02-02 16:30:03 +00:00
|
|
|
if (isOOQEnabled()) {
|
|
|
|
|
TimestampPacketContainer clearBarrierNodes;
|
|
|
|
|
timestampPacketDependencies.barrierNodes.swapNodes(clearBarrierNodes);
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
}
|
2021-12-29 14:28:21 +00:00
|
|
|
processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
|
2020-08-25 14:00:20 +02:00
|
|
|
|
2022-12-28 19:37:21 +00:00
|
|
|
if (!blockQueue && this->getContext().getRootDeviceIndices().size() > 1) {
|
|
|
|
|
migrateMultiGraphicsAllocationsIfRequired(multiDispatchInfo.peekBuiltinOpParams(), bcsCsr);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-01 13:14:09 +00:00
|
|
|
auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies);
|
|
|
|
|
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
|
2022-02-02 16:30:03 +00:00
|
|
|
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-26 11:53:25 +00:00
|
|
|
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
2021-03-11 13:48:04 +00:00
|
|
|
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
2020-08-25 14:00:20 +02:00
|
|
|
|
2022-03-01 13:14:09 +00:00
|
|
|
if (eventBuilder.getEvent()) {
|
|
|
|
|
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
|
|
|
|
|
|
|
|
|
|
const EnqueueProperties enqueueProperties(true, false, false, false, false, &blitPropertiesContainer);
|
|
|
|
|
|
2020-09-30 16:58:20 +02:00
|
|
|
LinearStream *gpgpuCommandStream = {};
|
|
|
|
|
size_t gpgpuCommandStreamStart = {};
|
2022-03-01 13:14:09 +00:00
|
|
|
if (gpgpuSubmission) {
|
2022-08-03 22:41:23 +00:00
|
|
|
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
|
|
|
|
commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
|
|
|
|
}
|
2021-06-14 16:35:48 +00:00
|
|
|
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false);
|
2020-09-30 16:58:20 +02:00
|
|
|
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
|
2021-08-26 11:53:25 +00:00
|
|
|
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
2022-12-24 18:25:41 +01:00
|
|
|
eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
if (!blockQueue) {
|
2021-08-25 16:03:15 +00:00
|
|
|
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
|
|
|
|
|
enqueueProperties, timestampPacketDependencies, eventsRequest,
|
2021-08-26 11:53:25 +00:00
|
|
|
eventBuilder, taskLevel, csrDeps, &bcsCsr);
|
2022-11-04 13:57:42 +00:00
|
|
|
if (completionStamp.taskCount > CompletionStamp::notReady) {
|
|
|
|
|
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
|
2022-05-05 16:52:25 +00:00
|
|
|
}
|
|
|
|
|
|
2022-04-20 09:13:26 +00:00
|
|
|
if (gpgpuSubmission) {
|
2022-08-03 22:41:23 +00:00
|
|
|
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
|
|
|
|
commandStreamReceiverOwnership.unlock();
|
|
|
|
|
}
|
2022-04-20 09:13:26 +00:00
|
|
|
}
|
|
|
|
|
|
2020-08-25 14:00:20 +02:00
|
|
|
if (eventBuilder.getEvent()) {
|
|
|
|
|
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this->latestSentEnqueueType = enqueueProperties.operation;
|
2021-12-29 14:28:21 +00:00
|
|
|
|
|
|
|
|
setLastBcsPacket(bcsCsr.getOsContext().getEngineType());
|
2020-08-25 14:00:20 +02:00
|
|
|
}
|
2020-08-26 11:26:44 +02:00
|
|
|
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
if (blockQueue) {
|
2022-12-24 18:25:41 +01:00
|
|
|
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
|
2022-04-20 09:13:26 +00:00
|
|
|
|
|
|
|
|
if (gpgpuSubmission) {
|
2022-08-03 22:41:23 +00:00
|
|
|
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
|
|
|
|
commandStreamReceiverOwnership.unlock();
|
|
|
|
|
}
|
2022-04-20 09:13:26 +00:00
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
}
|
|
|
|
|
|
2021-06-21 17:39:40 +00:00
|
|
|
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
2022-05-11 14:34:38 +00:00
|
|
|
csrDeps.copyNodesToNewContainer(*deferredTimestampPackets);
|
2022-08-03 22:41:23 +00:00
|
|
|
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
|
|
|
|
|
commandStreamReceiverOwnership.unlock();
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
queueOwnership.unlock();
|
2022-03-01 11:24:30 +00:00
|
|
|
bcsCommandStreamReceiverOwnership.unlock();
|
2020-08-25 14:00:20 +02:00
|
|
|
|
|
|
|
|
if (blocking) {
|
2022-03-21 11:08:43 +00:00
|
|
|
const auto waitStatus = waitForAllEngines(blockQueue, nullptr);
|
|
|
|
|
if (waitStatus == WaitStatus::GpuHang) {
|
|
|
|
|
return CL_OUT_OF_RESOURCES;
|
|
|
|
|
}
|
2020-08-25 14:00:20 +02:00
|
|
|
}
|
2022-03-21 11:08:43 +00:00
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
2020-08-25 14:00:20 +02:00
|
|
|
}
|
2020-08-28 12:46:32 +02:00
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
|
template <uint32_t cmdType, size_t surfaceCount>
|
2022-03-21 11:08:43 +00:00
|
|
|
cl_int CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
|
2021-09-06 17:04:14 +00:00
|
|
|
const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
|
2021-09-02 10:14:12 +00:00
|
|
|
|
2021-02-10 17:41:08 +00:00
|
|
|
if (blit) {
|
2022-07-20 15:05:27 +00:00
|
|
|
cl_int ret = CL_SUCCESS;
|
|
|
|
|
|
|
|
|
|
if (dispatchInfo.peekBuiltinOpParams().bcsSplit) {
|
|
|
|
|
ret = enqueueBlitSplit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
|
|
|
|
} else {
|
|
|
|
|
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
2020-08-28 12:46:32 +02:00
|
|
|
} else {
|
2020-09-01 11:39:32 +02:00
|
|
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
|
2020-10-22 12:14:54 +02:00
|
|
|
this->getClDevice());
|
2021-04-02 13:00:33 +00:00
|
|
|
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
2020-09-01 11:39:32 +02:00
|
|
|
|
|
|
|
|
builder.buildDispatchInfos(dispatchInfo);
|
|
|
|
|
|
2022-03-21 11:08:43 +00:00
|
|
|
return enqueueHandler<cmdType>(
|
2020-08-28 12:46:32 +02:00
|
|
|
surfaces,
|
|
|
|
|
blocking,
|
|
|
|
|
dispatchInfo,
|
|
|
|
|
numEventsInWaitList,
|
|
|
|
|
eventWaitList,
|
|
|
|
|
event);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-03-01 15:05:04 +00:00
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
|
bool CommandQueueHw<GfxFamily>::isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo) {
|
|
|
|
|
return multiDispatchInfo.getKernelObjsForAuxTranslation() &&
|
|
|
|
|
(multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0) &&
|
2022-12-08 12:22:35 +00:00
|
|
|
(GfxCoreHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo()) == AuxTranslationMode::Blit);
|
2021-03-01 15:05:04 +00:00
|
|
|
}
|
|
|
|
|
|
2019-03-26 11:59:46 +01:00
|
|
|
} // namespace NEO
|