Detect GPU hangs in blocking enqueue handler calls

This change introduces detection of GPU hangs in blocking
calls to enqueueHandler() function. Moreover, usages of
this function template have been revised and adjusted to
check the exit code. Furthermore, enqueueBlit() and
dispatchBcsOrGpgpuEnqueue() functions returns value now.
ULTs have been added to cover new cases.

Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
Related-To: NEO-6681
This commit is contained in:
Patryk Wrobel
2022-03-21 11:08:43 +00:00
committed by Compute-Runtime-Automation
parent 05e17e90d8
commit e4d56cde21
46 changed files with 1163 additions and 286 deletions

View File

@@ -344,42 +344,42 @@ class CommandQueueHw : public CommandQueue {
cl_int flush() override;
template <uint32_t enqueueType>
void enqueueHandler(Surface **surfacesForResidency,
size_t numSurfaceForResidency,
bool blocking,
const MultiDispatchInfo &dispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
cl_int enqueueHandler(Surface **surfacesForResidency,
size_t numSurfaceForResidency,
bool blocking,
const MultiDispatchInfo &dispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
template <uint32_t enqueueType, size_t size>
void enqueueHandler(Surface *(&surfacesForResidency)[size],
bool blocking,
const MultiDispatchInfo &dispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
enqueueHandler<enqueueType>(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event);
cl_int enqueueHandler(Surface *(&surfacesForResidency)[size],
bool blocking,
const MultiDispatchInfo &dispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
return enqueueHandler<enqueueType>(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event);
}
template <uint32_t enqueueType, size_t size>
void enqueueHandler(Surface *(&surfacesForResidency)[size],
bool blocking,
Kernel *kernel,
cl_uint workDim,
const size_t globalOffsets[3],
const size_t workItems[3],
const size_t *localWorkSizesIn,
const size_t *enqueuedWorkSizes,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
cl_int enqueueHandler(Surface *(&surfacesForResidency)[size],
bool blocking,
Kernel *kernel,
cl_uint workDim,
const size_t globalOffsets[3],
const size_t workItems[3],
const size_t *localWorkSizesIn,
const size_t *enqueuedWorkSizes,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
template <uint32_t cmdType, size_t surfaceCount>
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
cl_int dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
template <uint32_t cmdType>
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
template <uint32_t commandType>
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,

View File

@@ -91,13 +91,18 @@ cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj
MultiDispatchInfo multiDispatchInfo;
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
const auto enqueueResult = enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blocking == CL_TRUE,
multiDispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (enqueueResult != CL_SUCCESS) {
return enqueueResult;
}
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(commandType);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -24,12 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBarrierWithWaitList(
cl_event *event) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_BARRIER>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_BARRIER>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
} // namespace NEO

View File

@@ -8,6 +8,7 @@
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/local_work_size.h"
@@ -48,17 +49,17 @@ namespace NEO {
template <typename GfxFamily>
template <uint32_t commandType, size_t surfaceCount>
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount],
bool blocking,
Kernel *kernel,
cl_uint workDim,
const size_t globalOffsets[3],
const size_t workItems[3],
const size_t *localWorkSizesIn,
const size_t *enqueuedWorkSizes,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount],
bool blocking,
Kernel *kernel,
cl_uint workDim,
const size_t globalOffsets[3],
const size_t workItems[3],
const size_t *localWorkSizesIn,
const size_t *enqueuedWorkSizes,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
BuiltInOwnershipWrapper builtInLock;
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo(kernel);
@@ -92,7 +93,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
if (multiDispatchInfo.size() == 0) {
return;
return CL_SUCCESS;
}
}
@@ -104,25 +105,29 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
setupBlitAuxTranslation(multiDispatchInfo);
}
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
return enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
}
template <typename GfxFamily>
template <uint32_t commandType>
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
size_t numSurfaceForResidency,
bool blocking,
const MultiDispatchInfo &multiDispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
size_t numSurfaceForResidency,
bool blocking,
const MultiDispatchInfo &multiDispatchInfo,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) {
enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, blocking, multiDispatchInfo,
numEventsInWaitList, eventWaitList, event);
const auto enqueueResult = enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, blocking, multiDispatchInfo,
numEventsInWaitList, eventWaitList, event);
if (enqueueResult != CL_SUCCESS) {
return enqueueResult;
}
if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(commandType);
}
return;
return CL_SUCCESS;
}
TagNodeBase *hwTimeStamps = nullptr;
@@ -357,22 +362,35 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
queueOwnership.unlock();
if (blocking) {
auto waitStatus = WaitStatus::Ready;
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
if (builtinOpParams.userPtrForPostOperationCpuCopy) {
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), false);
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), false);
if (waitStatus == WaitStatus::GpuHang) {
return CL_OUT_OF_RESOURCES;
}
auto hostPtrAlloc = builtinOpParams.transferAllocation;
UNRECOVERABLE_IF(nullptr == hostPtrAlloc);
auto size = hostPtrAlloc->getUnderlyingBufferSize();
[[maybe_unused]] int cpuCopyStatus = memcpy_s(builtinOpParams.userPtrForPostOperationCpuCopy, size, hostPtrAlloc->getUnderlyingBuffer(), size);
DEBUG_BREAK_IF(cpuCopyStatus != 0);
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
} else {
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
}
if (waitStatus == WaitStatus::GpuHang) {
return CL_OUT_OF_RESOURCES;
}
}
if (migratedMemory) {
computeCommandStreamReceiver.flushBatchedSubmissions();
}
return CL_SUCCESS;
}
template <typename GfxFamily>
@@ -1067,7 +1085,7 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
template <typename GfxFamily>
template <uint32_t cmdType>
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
auto bcsCommandStreamReceiverOwnership = bcsCsr.obtainUniqueOwnership();
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
@@ -1159,17 +1177,22 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
bcsCommandStreamReceiverOwnership.unlock();
if (blocking) {
waitForAllEngines(blockQueue, nullptr);
const auto waitStatus = waitForAllEngines(blockQueue, nullptr);
if (waitStatus == WaitStatus::GpuHang) {
return CL_OUT_OF_RESOURCES;
}
}
return CL_SUCCESS;
}
template <typename GfxFamily>
template <uint32_t cmdType, size_t surfaceCount>
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
cl_int CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
if (blit) {
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
return enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
} else {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
this->getClDevice());
@@ -1177,7 +1200,7 @@ void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dis
builder.buildDispatchInfos(dispatchInfo);
enqueueHandler<cmdType>(
return enqueueHandler<cmdType>(
surfaces,
blocking,
dispatchInfo,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -51,8 +51,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
MemObjSurface s1(srcBuffer);
MemObjSurface s2(dstBuffer);
Surface *surfaces[] = {&s1, &s2};
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -58,8 +58,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.dstSlicePitch = dstSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -57,14 +57,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
MultiDispatchInfo dispatchInfo(dc);
builder.buildDispatchInfos(dispatchInfo);
enqueueHandler<CL_COMMAND_COPY_BUFFER_TO_IMAGE>(
return enqueueHandler<CL_COMMAND_COPY_BUFFER_TO_IMAGE>(
surfaces,
false,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -55,8 +55,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -56,14 +56,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
MultiDispatchInfo dispatchInfo(dc);
builder.buildDispatchInfos(dispatchInfo);
enqueueHandler<CL_COMMAND_COPY_IMAGE_TO_BUFFER>(
return enqueueHandler<CL_COMMAND_COPY_IMAGE_TO_BUFFER>(
surfaces,
false,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -81,7 +81,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
GeneralSurface s2(patternAllocation);
Surface *surfaces[] = {&s1, &s2};
enqueueHandler<CL_COMMAND_FILL_BUFFER>(
const auto enqueueResult = enqueueHandler<CL_COMMAND_FILL_BUFFER>(
surfaces,
false,
dispatchInfo,
@@ -92,6 +92,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION, taskCount);
return CL_SUCCESS;
return enqueueResult;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -47,14 +47,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillImage(
builder.buildDispatchInfos(di);
enqueueHandler<CL_COMMAND_FILL_IMAGE>(
return enqueueHandler<CL_COMMAND_FILL_IMAGE>(
surfaces,
false,
di,
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -135,7 +135,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
return CL_INVALID_WORK_GROUP_SIZE;
}
enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
return enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
surfaces,
false,
&kernel,
@@ -147,7 +147,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -26,12 +26,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueMarkerWithWaitList(
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_MARKER>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -26,13 +26,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueMigrateMemObjects(cl_uint numMemObjects
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MIGRATE_MEM_OBJECTS>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_MIGRATE_MEM_OBJECTS>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -115,8 +115,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
return CL_SUCCESS;
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.dstSlicePitch = hostSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -119,7 +119,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (context->isProvidingPerformanceHints()) {
if (!isL3Capable(ptr, hostPtrSize)) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -25,13 +25,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueResourceBarrier(BarrierCommand *resourc
const cl_event *eventWaitList,
cl_event *event) {
MultiDispatchInfo multiDispatch;
enqueueHandler<CL_COMMAND_RESOURCE_BARRIER>(resourceBarrier->surfacePtrs.begin(),
resourceBarrier->numSurfaces,
false,
multiDispatch,
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_RESOURCE_BARRIER>(resourceBarrier->surfacePtrs.begin(),
resourceBarrier->numSurfaces,
false,
multiDispatch,
numEventsInWaitList,
eventWaitList,
event);
}
} // namespace NEO

View File

@@ -84,26 +84,23 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA, svmPtr);
}
enqueueHandler<CL_COMMAND_SVM_MAP>(surfaces,
blocking,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_SVM_MAP>(surfaces,
blocking,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
} else {
auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr);
if (svmOperation) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_MAP>(surfaces,
blocking,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_SVM_MAP>(surfaces,
blocking,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &svmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
@@ -128,7 +125,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
@@ -156,38 +156,36 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
} else {
auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr);
if (!svmOperation) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
if (svmOperation->readOnlyMap) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
const auto enqueueResult = enqueueHandler<CL_COMMAND_SVM_UNMAP>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
context->getSVMAllocsManager()->removeSvmMapOperation(svmPtr);
return CL_SUCCESS;
return enqueueResult;
}
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, {}, &svmData->gpuAllocations, device->getRootDeviceIndex(), &svmOperation->regionSize};
@@ -213,7 +211,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
@@ -249,12 +250,22 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMFree(cl_uint numSvmPointers,
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_FREE>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
retEvent);
const auto enqueueResult = enqueueHandler<CL_COMMAND_SVM_FREE>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
retEvent);
if (enqueueResult != CL_SUCCESS) {
delete pFreeData;
if (ownsEventDeletion) {
castToObjectOrAbort<Event>(*retEvent)->release();
retEvent = nullptr;
}
return enqueueResult;
}
auto eventObject = castToObjectOrAbort<Event>(*retEvent);
eventObject->addCallback(freeSvmEventClb, CL_COMPLETE, pFreeData);
@@ -346,6 +357,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
BuiltinOpParams operationParams;
Surface *surfaces[2];
cl_command_type cmdType;
cl_int dispatchResult = CL_SUCCESS;
if (copyType == SvmToHost) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, {}, device->getRootDeviceIndex(), &size};
@@ -366,8 +378,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == HostToSvm) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, dstAllocation, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
@@ -387,8 +398,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &srcHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == SvmToSvm) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, dstAllocation, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
@@ -400,8 +410,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstSvmSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
@@ -423,13 +432,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
}
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY);
}
return CL_SUCCESS;
return dispatchResult;
}
template <typename GfxFamily>
@@ -508,7 +518,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
GeneralSurface s2(patternAllocation);
Surface *surfaces[] = {&s1, &s2};
enqueueHandler<CL_COMMAND_SVM_MEMFILL>(
const auto enqueueResult = enqueueHandler<CL_COMMAND_SVM_MEMFILL>(
surfaces,
false,
dispatchInfo,
@@ -518,7 +528,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
storageWithAllocations->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION, taskCount);
return CL_SUCCESS;
return enqueueResult;
}
template <typename GfxFamily>
@@ -532,13 +542,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMigrateMem(cl_uint numSvmPointers,
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_SVM_MIGRATE_MEM>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
return CL_SUCCESS;
return enqueueHandler<CL_COMMAND_SVM_MIGRATE_MEM>(surfaces,
false,
MultiDispatchInfo(),
numEventsInWaitList,
eventWaitList,
event);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -100,7 +100,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
dc.dstSlicePitch = bufferSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -96,7 +96,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));