2017-12-21 00:45:38 +01:00
|
|
|
/*
|
2024-03-05 11:49:07 +00:00
|
|
|
* Copyright (C) 2019-2024 Intel Corporation
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
2018-09-18 09:11:08 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 00:45:38 +01:00
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2020-02-23 22:44:01 +01:00
|
|
|
#include "shared/source/helpers/blit_commands_helper.h"
|
2021-12-01 16:37:46 +00:00
|
|
|
#include "shared/source/utilities/wait_util.h"
|
2020-02-24 10:22:30 +01:00
|
|
|
|
2020-02-22 22:50:57 +01:00
|
|
|
#include "opencl/source/built_ins/aux_translation_builtin.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_barrier.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_copy_buffer.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_copy_buffer_rect.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_copy_buffer_to_image.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_copy_image.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_copy_image_to_buffer.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_fill_buffer.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_fill_image.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_kernel.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_marker.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_migrate_mem_objects.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_read_buffer.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_read_buffer_rect.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_read_image.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_svm.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_write_buffer.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_write_buffer_rect.h"
|
|
|
|
|
#include "opencl/source/command_queue/enqueue_write_image.h"
|
|
|
|
|
#include "opencl/source/command_queue/finish.h"
|
|
|
|
|
#include "opencl/source/command_queue/flush.h"
|
|
|
|
|
#include "opencl/source/command_queue/gpgpu_walker.h"
|
2018-08-22 18:41:52 +02:00
|
|
|
|
2019-03-26 11:59:46 +01:00
|
|
|
namespace NEO {
|
2018-08-22 18:41:52 +02:00
|
|
|
template <typename Family>
|
2020-07-10 16:04:01 +02:00
|
|
|
void CommandQueueHw<Family>::notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr) {
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
2020-07-10 16:04:01 +02:00
|
|
|
buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr);
|
2019-01-27 17:36:42 +01:00
|
|
|
buffer->forceDisallowCPUCopy = blockingRead;
|
|
|
|
|
}
|
2018-08-22 18:41:52 +02:00
|
|
|
}
|
|
|
|
|
template <typename Family>
|
2020-07-10 16:04:01 +02:00
|
|
|
void CommandQueueHw<Family>::notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr) {
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) {
|
2020-07-10 16:04:01 +02:00
|
|
|
image->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr);
|
2019-01-27 17:36:42 +01:00
|
|
|
}
|
2018-08-22 18:41:52 +02:00
|
|
|
}
|
2019-05-10 10:46:54 +02:00
|
|
|
|
2020-08-31 13:10:29 +02:00
|
|
|
template <typename Family>
|
|
|
|
|
void CommandQueueHw<Family>::notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) {
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) {
|
2020-08-31 13:10:29 +02:00
|
|
|
gfxAllocation->setAllocDumpable(blockingCopy, notifyBcsCsr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-30 14:36:12 +02:00
|
|
|
template <typename Family>
|
|
|
|
|
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
|
|
|
|
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
|
|
|
|
const cl_event *eventWaitList, cl_event *event) {
|
|
|
|
|
cl_int retVal = CL_SUCCESS;
|
|
|
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
|
|
2020-05-25 18:27:17 +02:00
|
|
|
TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true, getDevice().getRootDeviceIndex());
|
2019-05-30 14:36:12 +02:00
|
|
|
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
|
|
|
|
return retVal;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
|
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
|
|
|
|
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
|
|
|
|
const cl_event *eventWaitList, cl_event *event) {
|
|
|
|
|
cl_int retVal = CL_SUCCESS;
|
|
|
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
|
|
2020-05-25 18:27:17 +02:00
|
|
|
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false, getDevice().getRootDeviceIndex());
|
2019-05-30 14:36:12 +02:00
|
|
|
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
|
|
|
|
if (event) {
|
|
|
|
|
auto pEvent = castToObjectOrAbort<Event>(*event);
|
|
|
|
|
pEvent->setCmdType(commandType);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (context->isProvidingPerformanceHints()) {
|
|
|
|
|
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(buffer), ptr);
|
|
|
|
|
}
|
|
|
|
|
return retVal;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
|
cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
|
|
|
|
|
const cl_event *eventWaitList, cl_event *event) {
|
|
|
|
|
MultiDispatchInfo multiDispatchInfo;
|
|
|
|
|
NullSurface s;
|
|
|
|
|
Surface *surfaces[] = {&s};
|
2022-03-21 11:08:43 +00:00
|
|
|
const auto enqueueResult = enqueueHandler<CL_COMMAND_MARKER>(
|
2019-05-30 14:36:12 +02:00
|
|
|
surfaces,
|
|
|
|
|
blocking == CL_TRUE,
|
|
|
|
|
multiDispatchInfo,
|
|
|
|
|
numEventsInWaitList,
|
|
|
|
|
eventWaitList,
|
|
|
|
|
event);
|
2022-03-21 11:08:43 +00:00
|
|
|
|
|
|
|
|
if (enqueueResult != CL_SUCCESS) {
|
|
|
|
|
return enqueueResult;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-30 14:36:12 +02:00
|
|
|
if (event) {
|
|
|
|
|
auto pEvent = castToObjectOrAbort<Event>(*event);
|
|
|
|
|
pEvent->setCmdType(commandType);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (context->isProvidingPerformanceHints()) {
|
|
|
|
|
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(memObj), ptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
|
|
|
|
}
|
2019-07-09 14:24:33 +02:00
|
|
|
|
|
|
|
|
template <typename Family>
|
2019-10-31 09:10:30 +01:00
|
|
|
void CommandQueueHw<Family>::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo,
|
|
|
|
|
AuxTranslationDirection auxTranslationDirection) {
|
2023-12-01 08:57:48 +00:00
|
|
|
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::auxTranslation, getClDevice());
|
|
|
|
|
auto &auxTranslationBuilder = static_cast<BuiltInOp<EBuiltInOps::auxTranslation> &>(builder);
|
2019-07-09 14:24:33 +02:00
|
|
|
BuiltinOpParams dispatchParams;
|
|
|
|
|
|
|
|
|
|
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
|
|
|
|
|
|
|
|
|
auxTranslationBuilder.buildDispatchInfosForAuxTranslation<Family>(multiDispatchInfo, dispatchParams);
|
|
|
|
|
}
|
2019-10-09 18:20:12 +02:00
|
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
|
bool CommandQueueHw<Family>::forceStateless(size_t size) {
|
|
|
|
|
return size >= 4ull * MemoryConstants::gigaByte;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-28 09:07:07 +01:00
|
|
|
template <typename Family>
|
|
|
|
|
bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.ForceCacheFlushForBcs.get() != -1) {
|
|
|
|
|
return !!debugManager.flags.ForceCacheFlushForBcs.get();
|
2020-06-23 12:01:22 +02:00
|
|
|
}
|
2020-02-28 09:07:07 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-01 16:37:46 +00:00
|
|
|
template <typename TSPacketType>
|
2022-09-19 10:20:14 +00:00
|
|
|
inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container, CommandStreamReceiver &csr, WaitStatus &status) {
|
2024-08-30 17:33:28 +00:00
|
|
|
bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
|
2021-12-01 16:37:46 +00:00
|
|
|
bool waited = false;
|
2023-12-01 09:12:59 +00:00
|
|
|
status = WaitStatus::notReady;
|
2021-12-01 16:37:46 +00:00
|
|
|
|
2021-12-09 11:59:52 +00:00
|
|
|
if (container) {
|
2022-09-19 10:20:14 +00:00
|
|
|
auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
|
2021-12-09 11:59:52 +00:00
|
|
|
for (const auto ×tamp : container->peekNodes()) {
|
|
|
|
|
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
|
2024-08-30 17:33:28 +00:00
|
|
|
if (printWaitForCompletion) {
|
|
|
|
|
printf("\nWaiting for TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
|
|
|
|
|
}
|
2021-12-09 11:59:52 +00:00
|
|
|
while (timestamp->getContextEndValue(i) == 1) {
|
2021-12-27 10:43:20 +00:00
|
|
|
csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex()));
|
2021-12-09 11:59:52 +00:00
|
|
|
WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
|
2022-09-19 10:20:14 +00:00
|
|
|
if (csr.checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
|
2023-12-01 09:12:59 +00:00
|
|
|
status = WaitStatus::gpuHang;
|
2024-08-30 17:33:28 +00:00
|
|
|
if (printWaitForCompletion) {
|
|
|
|
|
printf("\nWaiting for TS failed");
|
|
|
|
|
}
|
2022-09-19 10:20:14 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
2021-12-09 11:59:52 +00:00
|
|
|
}
|
2024-08-30 17:33:28 +00:00
|
|
|
if (printWaitForCompletion) {
|
|
|
|
|
printf("\nWaiting for TS completed");
|
|
|
|
|
}
|
2023-12-01 09:12:59 +00:00
|
|
|
status = WaitStatus::ready;
|
2021-12-09 11:59:52 +00:00
|
|
|
waited = true;
|
2021-12-01 16:37:46 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return waited;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Family>
|
2023-07-13 15:48:26 +00:00
|
|
|
bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) {
|
2021-12-01 16:37:46 +00:00
|
|
|
using TSPacketType = typename Family::TimestampPacketType;
|
2021-12-09 11:59:52 +00:00
|
|
|
bool waited = false;
|
2021-12-01 16:37:46 +00:00
|
|
|
|
2022-03-02 11:54:11 +00:00
|
|
|
if (isWaitForTimestampsEnabled()) {
|
2024-03-05 11:49:07 +00:00
|
|
|
{
|
|
|
|
|
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
|
|
|
|
|
waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status);
|
|
|
|
|
}
|
2022-04-19 11:43:40 +00:00
|
|
|
|
|
|
|
|
if (waited) {
|
2024-08-22 14:04:53 +00:00
|
|
|
getGpgpuCommandStreamReceiver().downloadAllocations(true);
|
2022-04-19 11:43:40 +00:00
|
|
|
for (const auto ©Engine : copyEnginesToWait) {
|
|
|
|
|
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
|
2024-08-22 14:04:53 +00:00
|
|
|
bcsCsr->downloadAllocations(true);
|
2022-04-19 11:43:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
2021-12-01 16:37:46 +00:00
|
|
|
}
|
2021-12-09 11:59:52 +00:00
|
|
|
|
|
|
|
|
return waited;
|
2021-12-01 16:37:46 +00:00
|
|
|
}
|
|
|
|
|
|
2019-11-09 19:02:25 +01:00
|
|
|
template <typename Family>
|
|
|
|
|
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
|
|
|
|
|
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
|
2023-12-13 16:09:52 +00:00
|
|
|
TimestampPacketHelper::programSemaphoreForAuxTranslation<Family, AuxTranslationDirection::auxToNonAux>);
|
2019-11-09 19:02:25 +01:00
|
|
|
|
|
|
|
|
multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod(
|
2023-12-13 16:09:52 +00:00
|
|
|
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family, AuxTranslationDirection::auxToNonAux>);
|
2019-11-09 19:02:25 +01:00
|
|
|
|
|
|
|
|
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod(
|
2023-12-13 16:09:52 +00:00
|
|
|
TimestampPacketHelper::programSemaphoreForAuxTranslation<Family, AuxTranslationDirection::nonAuxToAux>);
|
2019-11-09 19:02:25 +01:00
|
|
|
|
|
|
|
|
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod(
|
2023-12-13 16:09:52 +00:00
|
|
|
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family, AuxTranslationDirection::nonAuxToAux>);
|
2019-11-09 19:02:25 +01:00
|
|
|
}
|
|
|
|
|
|
2020-06-26 11:21:07 +02:00
|
|
|
template <typename Family>
|
2024-07-25 15:45:32 +00:00
|
|
|
bool CommandQueueHw<Family>::isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies ×tampPacketDependencies, bool containsCrossEngineDependency) const {
|
2022-02-02 16:30:03 +00:00
|
|
|
if (queueBlocked || timestampPacketDependencies.barrierNodes.peekNodes().size() > 0u) {
|
2020-06-26 11:21:07 +02:00
|
|
|
return true;
|
|
|
|
|
}
|
2024-07-25 15:45:32 +00:00
|
|
|
if (isOOQEnabled()) {
|
|
|
|
|
return containsCrossEngineDependency;
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-11 08:48:10 +00:00
|
|
|
bool required = false;
|
|
|
|
|
switch (latestSentEnqueueType) {
|
|
|
|
|
case NEO::EnqueueProperties::Operation::explicitCacheFlush:
|
|
|
|
|
case NEO::EnqueueProperties::Operation::enqueueWithoutSubmission:
|
|
|
|
|
case NEO::EnqueueProperties::Operation::gpuKernel:
|
|
|
|
|
case NEO::EnqueueProperties::Operation::profilingOnly:
|
|
|
|
|
required = isCacheFlushForBcsRequired() || !(getGpgpuCommandStreamReceiver().getDispatchMode() == DispatchMode::immediateDispatch || getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed());
|
|
|
|
|
break;
|
|
|
|
|
case NEO::EnqueueProperties::Operation::dependencyResolveOnGpu:
|
|
|
|
|
return true;
|
|
|
|
|
break;
|
|
|
|
|
case NEO::EnqueueProperties::Operation::none:
|
|
|
|
|
case NEO::EnqueueProperties::Operation::blit:
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-06-26 11:21:07 +02:00
|
|
|
|
2023-11-30 08:32:25 +00:00
|
|
|
if (debugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) {
|
2020-06-26 11:21:07 +02:00
|
|
|
required = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return required;
|
|
|
|
|
}
|
2020-08-26 11:26:44 +02:00
|
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
|
void CommandQueueHw<Family>::setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType) {
|
|
|
|
|
if (outEvent) {
|
|
|
|
|
eventBuilder.create<Event>(this, cmdType, CompletionStamp::notReady, 0);
|
|
|
|
|
auto eventObj = eventBuilder.getEvent();
|
|
|
|
|
*outEvent = eventObj;
|
|
|
|
|
|
|
|
|
|
if (eventObj->isProfilingEnabled()) {
|
2023-10-23 15:22:11 +00:00
|
|
|
eventObj->setQueueTimeStamp();
|
2020-08-26 11:26:44 +02:00
|
|
|
|
2024-07-01 14:24:06 +00:00
|
|
|
if (isCommandWithoutKernel(cmdType) && !isFlushForProfilingRequired(cmdType)) {
|
2020-08-26 11:26:44 +02:00
|
|
|
eventObj->setCPUProfilingPath(true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", cmdType, "output Event", eventObj);
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-03-13 13:34:25 +00:00
|
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
|
CommandQueueHw<Family>::~CommandQueueHw() {
|
2023-08-11 10:22:19 +00:00
|
|
|
unregisterGpgpuAndBcsCsrClients();
|
2023-03-13 13:34:25 +00:00
|
|
|
}
|
2019-03-26 11:59:46 +01:00
|
|
|
} // namespace NEO
|