Add selectCsrForBuiltinOperation method to OpenCL CommandQueue

Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
Related-To: NEO-6057
This commit is contained in:
Maciej Dziuban
2021-09-06 17:04:14 +00:00
committed by Compute-Runtime-Automation
parent 03ee6bc2dd
commit 858727010f
22 changed files with 552 additions and 197 deletions

View File

@@ -12,6 +12,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_common.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_common.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer.h

View File

@@ -153,6 +153,19 @@ CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed)
return getGpgpuCommandStreamReceiver(); return getGpgpuCommandStreamReceiver();
} }
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const {
const bool blitAllowed = blitEnqueueAllowed(args);
const bool blitPreferred = blitEnqueuePreferred(args);
const bool blitRequired = isCopyOnly;
const bool blit = blitAllowed && (blitPreferred || blitRequired);
if (blit) {
return *bcsEngine->commandStreamReceiver;
} else {
return getGpgpuCommandStreamReceiver();
}
}
Device &CommandQueue::getDevice() const noexcept { Device &CommandQueue::getDevice() const noexcept {
return device->getDevice(); return device->getDevice();
} }
@@ -725,15 +738,20 @@ bool CommandQueue::queueDependenciesClearRequired() const {
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
} }
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
auto blitterSupported = bcsEngine != nullptr; if (bcsEngine == nullptr) {
return false;
}
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly; bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) { if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get(); blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
} }
if (!blitEnqueueAllowed) {
return false;
}
switch (cmdType) { switch (args.cmdType) {
case CL_COMMAND_READ_BUFFER: case CL_COMMAND_READ_BUFFER:
case CL_COMMAND_WRITE_BUFFER: case CL_COMMAND_WRITE_BUFFER:
case CL_COMMAND_COPY_BUFFER: case CL_COMMAND_COPY_BUFFER:
@@ -741,30 +759,25 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
case CL_COMMAND_WRITE_BUFFER_RECT: case CL_COMMAND_WRITE_BUFFER_RECT:
case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_COPY_BUFFER_RECT:
case CL_COMMAND_SVM_MEMCPY: case CL_COMMAND_SVM_MEMCPY:
case CL_COMMAND_SVM_MAP:
case CL_COMMAND_SVM_UNMAP:
return true;
case CL_COMMAND_READ_IMAGE: case CL_COMMAND_READ_IMAGE:
return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image);
case CL_COMMAND_WRITE_IMAGE: case CL_COMMAND_WRITE_IMAGE:
return blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image);
case CL_COMMAND_COPY_IMAGE: case CL_COMMAND_COPY_IMAGE:
return blitterSupported && blitEnqueueAllowed; return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image) &&
blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image);
default: default:
return false; return false;
} }
} }
bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const { bool CommandQueue::blitEnqueuePreferred(const CsrSelectionArgs &args) const {
bool isLocalToLocal = false; if (args.direction == TransferDirection::LocalToLocal) {
if (cmdType == CL_COMMAND_COPY_BUFFER &&
builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) {
isLocalToLocal = true;
}
if (cmdType == CL_COMMAND_SVM_MEMCPY &&
builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) {
isLocalToLocal = true;
}
if (isLocalToLocal) {
if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) { if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) {
return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get()); return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get());
} }
@@ -775,7 +788,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp
return true; return true;
} }
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) { bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const {
const auto &hwInfo = device->getHardwareInfo(); const auto &hwInfo = device->getHardwareInfo();
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo); auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo);

View File

@@ -8,6 +8,7 @@
#pragma once #pragma once
#include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/engine_control.h"
#include "opencl/source/command_queue/csr_selection_args.h"
#include "opencl/source/event/event.h" #include "opencl/source/event/event.h"
#include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/base_object.h"
#include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/dispatch_info.h"
@@ -225,6 +226,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
CommandStreamReceiver *getBcsCommandStreamReceiver() const; CommandStreamReceiver *getBcsCommandStreamReceiver() const;
CommandStreamReceiver *getBcsForAuxTranslation() const; CommandStreamReceiver *getBcsForAuxTranslation() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const; MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const;
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const;
Device &getDevice() const noexcept; Device &getDevice() const noexcept;
ClDevice &getClDevice() const { return *device; } ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; } Context &getContext() const { return *context; }
@@ -353,9 +355,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_uint numEventsInWaitList, const cl_event *eventWaitList); cl_uint numEventsInWaitList, const cl_event *eventWaitList);
void providePerformanceHint(TransferProperties &transferProperties); void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const; bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(cl_command_type cmdType) const; bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const; bool blitEnqueuePreferred(const CsrSelectionArgs &args) const;
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image); MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
void waitForLatestTaskCount(); void waitForLatestTaskCount();

View File

@@ -365,10 +365,10 @@ class CommandQueueHw : public CommandQueue {
cl_event *event); cl_event *event);
template <uint32_t cmdType, size_t surfaceCount> template <uint32_t cmdType, size_t surfaceCount>
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed); void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
template <uint32_t cmdType> template <uint32_t cmdType>
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking); void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
template <uint32_t commandType> template <uint32_t commandType>
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency, CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,

View File

@@ -0,0 +1,97 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/memory_manager/multi_graphics_allocation.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/mem_obj/mem_obj.h"
namespace NEO {
enum class TransferDirection {
HostToHost,
HostToLocal,
LocalToHost,
LocalToLocal,
};
struct CsrSelectionArgs {
struct Resource {
bool isLocal = false;
const GraphicsAllocation *allocation = nullptr;
const Image *image = nullptr;
const size_t *imageOrigin = nullptr;
};
cl_command_type cmdType;
const size_t *size = nullptr;
Resource srcResource;
Resource dstResource;
TransferDirection direction;
CsrSelectionArgs(cl_command_type cmdType, const size_t *size)
: cmdType(cmdType),
size(size),
direction(TransferDirection::HostToHost) {}
template <typename ResourceType>
CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size)
: cmdType(cmdType),
size(size) {
if (src) {
processResource(*src, rootDeviceIndex, this->srcResource);
}
if (dst) {
processResource(*dst, rootDeviceIndex, this->dstResource);
}
this->direction = createTransferDirection(srcResource.isLocal, dstResource.isLocal);
}
CsrSelectionArgs(cl_command_type cmdType, Image *src, Image *dst, uint32_t rootDeviceIndex, const size_t *size, const size_t *srcOrigin, const size_t *dstOrigin)
: CsrSelectionArgs(cmdType, src, dst, rootDeviceIndex, size) {
if (src) {
srcResource.imageOrigin = srcOrigin;
}
if (dst) {
dstResource.imageOrigin = dstOrigin;
}
}
static void processResource(const Image &image, uint32_t rootDeviceIndex, Resource &outResource) {
processResource(image.getMultiGraphicsAllocation(), rootDeviceIndex, outResource);
outResource.image = &image;
}
static void processResource(const Buffer &buffer, uint32_t rootDeviceIndex, Resource &outResource) {
processResource(buffer.getMultiGraphicsAllocation(), rootDeviceIndex, outResource);
}
static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
outResource.allocation = multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex);
outResource.isLocal = outResource.allocation->isAllocatedInLocalMemoryPool();
}
static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
if (srcLocal) {
if (dstLocal) {
return TransferDirection::LocalToLocal;
} else {
return TransferDirection::LocalToHost;
}
} else {
if (dstLocal) {
return TransferDirection::HostToLocal;
} else {
return TransferDirection::HostToHost;
}
}
}
};
} // namespace NEO

View File

@@ -1162,9 +1162,8 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
template <typename GfxFamily> template <typename GfxFamily>
template <uint32_t cmdType> template <uint32_t cmdType>
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) { void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
auto &bcsCsr = *getBcsCommandStreamReceiver();
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
EventBuilder eventBuilder; EventBuilder eventBuilder;
@@ -1251,13 +1250,11 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
template <typename GfxFamily> template <typename GfxFamily>
template <uint32_t cmdType, size_t surfaceCount> template <uint32_t cmdType, size_t surfaceCount>
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) { void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams()); const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
const bool blitRequired = isCopyOnly;
const bool blit = blitAllowed && (blitPreferred || blitRequired);
if (blit) { if (blit) {
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking); enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
} else { } else {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation, auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
this->getClDevice()); this->getClDevice());

View File

@@ -30,6 +30,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer; auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer;
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless; eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless;
@@ -47,8 +51,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
MemObjSurface s1(srcBuffer); MemObjSurface s1(srcBuffer);
MemObjSurface s2(dstBuffer); MemObjSurface s2(dstBuffer);
Surface *surfaces[] = {&s1, &s2}; Surface *surfaces[] = {&s1, &s2};
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -33,6 +33,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto eBuiltInOps = EBuiltInOps::CopyBufferRect; auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
@@ -54,8 +58,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.dstSlicePitch = dstSlicePitch; dc.dstSlicePitch = dstSlicePitch;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -31,6 +31,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
constexpr cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, dstImage, device->getRootDeviceIndex(), region, srcOrigin, dstOrigin};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
MemObjSurface srcImgSurf(srcImage); MemObjSurface srcImgSurf(srcImage);
MemObjSurface dstImgSurf(dstImage); MemObjSurface dstImgSurf(dstImage);
@@ -50,10 +54,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
} }
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -36,8 +36,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
cl_event *event) { cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER; const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
auto &csr = getCommandStreamReceiver(blitAllowed); CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (nullptr == mapAllocation) { if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
@@ -128,7 +129,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
} }
} }
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -35,8 +35,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
auto isMemTransferNeeded = true;
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto isMemTransferNeeded = true;
if (buffer->isMemObjZeroCopy()) { if (buffer->isMemObjZeroCopy()) {
size_t bufferOffset; size_t bufferOffset;
size_t hostOffset; size_t hostOffset;
@@ -59,12 +62,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
MemObjSurface bufferSurf(buffer); MemObjSurface bufferSurf(buffer);
HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (region[0] != 0 && if (region[0] != 0 &&
region[1] != 0 && region[1] != 0 &&
region[2] != 0) { region[2] != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -89,7 +90,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.dstSlicePitch = hostSlicePitch; dc.dstSlicePitch = hostSlicePitch;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
if (context->isProvidingPerformanceHints()) { if (context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr); context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);

View File

@@ -40,9 +40,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
cl_command_type cmdType = CL_COMMAND_READ_IMAGE; constexpr cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage);
auto &csr = getCommandStreamReceiver(blitAllowed); CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, {}, device->getRootDeviceIndex(), region, origin, nullptr};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (nullptr == mapAllocation) { if (nullptr == mapAllocation) {
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType())); notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
@@ -105,7 +106,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer; auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
if (context->isProvidingPerformanceHints()) { if (context->isProvidingPerformanceHints()) {
if (!isL3Capable(ptr, hostPtrSize)) { if (!isL3Capable(ptr, hostPtrSize)) {

View File

@@ -106,8 +106,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
return CL_SUCCESS; return CL_SUCCESS;
} }
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex()); CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &svmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
GeneralSurface dstSurface(svmData->cpuAllocation); GeneralSurface dstSurface(svmData->cpuAllocation);
GeneralSurface srcSurface(gpuAllocation); GeneralSurface srcSurface(gpuAllocation);
@@ -126,8 +128,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall; dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed);
if (event) { if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP); castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
@@ -189,8 +190,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
return CL_SUCCESS; return CL_SUCCESS;
} }
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex()); CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, {}, &svmData->gpuAllocations, device->getRootDeviceIndex(), &svmOperation->regionSize};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
gpuAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); gpuAllocation->setAubWritable(true, GraphicsAllocation::defaultBank);
gpuAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank);
@@ -210,8 +213,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall; dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
if (event) { if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP); castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
@@ -328,12 +330,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
cl_command_type cmdType; cl_command_type cmdType;
if (copyType == SvmToHost) { if (copyType == SvmToHost) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
HostPtrSurface dstHostPtrSurf(dstPtr, size); HostPtrSurface dstHostPtrSurf(dstPtr, size);
cmdType = CL_COMMAND_READ_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) { if (size != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true); bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -346,15 +348,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf; surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams); dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == HostToSvm) { } else if (copyType == HostToSvm) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size); HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
cmdType = CL_COMMAND_WRITE_BUFFER; cmdType = CL_COMMAND_WRITE_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) { if (size != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -367,9 +370,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &srcHostPtrSurf; surfaces[1] = &srcHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams); dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == SvmToSvm) { } else if (copyType == SvmToSvm) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
@@ -378,16 +385,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstSvmSurf; surfaces[1] = &dstSvmSurf;
dispatchInfo.setBuiltinOpParams(operationParams); dispatchInfo.setBuiltinOpParams(operationParams);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
} else { } else {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size); HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
HostPtrSurface dstHostPtrSurf(dstPtr, size); HostPtrSurface dstHostPtrSurf(dstPtr, size);
cmdType = CL_COMMAND_WRITE_BUFFER; cmdType = CL_COMMAND_WRITE_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) { if (size != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true); status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) { if (!status) {
@@ -401,7 +408,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf; surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams); dispatchInfo.setBuiltinOpParams(operationParams);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} }
if (event) { if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event); auto pEvent = castToObjectOrAbort<Event>(*event);

View File

@@ -31,10 +31,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
CsrSelectionArgs csrSelectionArgs{cmdType, {}, buffer, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto rootDeviceIndex = getDevice().getRootDeviceIndex();
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr), bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList); numEventsInWaitList, eventWaitList);
@@ -82,7 +84,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
MemObjSurface bufferSurf(buffer); MemObjSurface bufferSurf(buffer);
GeneralSurface mapSurface; GeneralSurface mapSurface;
Surface *surfaces[] = {&bufferSurf, nullptr}; Surface *surfaces[] = {&bufferSurf, nullptr};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (mapAllocation) { if (mapAllocation) {
surfaces[1] = &mapSurface; surfaces[1] = &mapSurface;
@@ -95,8 +96,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
} else { } else {
surfaces[1] = &hostPtrSurf; surfaces[1] = &hostPtrSurf;
if (size != 0) { if (size != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -116,7 +115,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (context->isProvidingPerformanceHints()) { if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer)); context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -34,6 +34,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT; const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT;
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto isMemTransferNeeded = true; auto isMemTransferNeeded = true;
if (buffer->isMemObjZeroCopy()) { if (buffer->isMemObjZeroCopy()) {
@@ -58,12 +62,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
MemObjSurface dstBufferSurf(buffer); MemObjSurface dstBufferSurf(buffer);
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf}; Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (region[0] != 0 && if (region[0] != 0 &&
region[1] != 0 && region[1] != 0 &&
region[2] != 0) { region[2] != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -88,7 +90,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
dc.dstSlicePitch = bufferSlicePitch; dc.dstSlicePitch = bufferSlicePitch;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (context->isProvidingPerformanceHints()) { if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer)); context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -34,7 +34,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto cmdType = CL_COMMAND_WRITE_IMAGE; constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, device->getRootDeviceIndex(), region, nullptr, origin};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
auto isMemTransferNeeded = true; auto isMemTransferNeeded = true;
if (dstImage->isMemObjZeroCopy()) { if (dstImage->isMemObjZeroCopy()) {
@@ -54,7 +58,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
GeneralSurface mapSurface; GeneralSurface mapSurface;
Surface *surfaces[] = {&dstImgSurf, nullptr}; Surface *surfaces[] = {&dstImgSurf, nullptr};
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage);
if (mapAllocation) { if (mapAllocation) {
surfaces[1] = &mapSurface; surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation); mapSurface.setGraphicsAllocation(mapAllocation);
@@ -66,7 +69,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
if (region[0] != 0 && if (region[0] != 0 &&
region[1] != 0 && region[1] != 0 &&
region[2] != 0) { region[2] != 0) {
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) { if (!status) {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
@@ -94,7 +96,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d; auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
MultiDispatchInfo dispatchInfo(dc); MultiDispatchInfo dispatchInfo(dc);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed); dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
if (context->isProvidingPerformanceHints()) { if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage)); context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));

View File

@@ -10,6 +10,7 @@ set(IGDRCL_SRCS_tests_command_queue
${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests.cpp

View File

@@ -230,23 +230,19 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi
struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t> {}; struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t> {};
TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) { TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) {
HardwareInfo hwInfo = *defaultHwInfo; HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = false; hwInfo.capabilityTable.blitterOperationsSupported = false;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo)); auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
auto cmdType = GetParam();
EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver()); EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver());
auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver());
auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver(blitAllowed));
} }
HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) { TEST(CommandQueue, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
VariableBackup<bool> mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; VariableBackup<bool> mockDeviceFlagBackup{&MockDevice::createSingleDevice, false};
DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.CreateMultipleSubDevices.set(2);
@@ -263,13 +259,9 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin
auto &bcsEngine = subDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto &bcsEngine = subDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular);
MockCommandQueue cmdQ(nullptr, device.get(), 0, false); MockCommandQueue cmdQ(nullptr, device.get(), 0, false);
auto cmdType = GetParam();
auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver()); EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver());
EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver()); EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver());
EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.getCommandStreamReceiver(blitAllowed));
EXPECT_EQ(bcsEngine.osContext, &cmdQ.getCommandStreamReceiver(blitAllowed).getOsContext());
} }
INSTANTIATE_TEST_CASE_P(uint32_t, INSTANTIATE_TEST_CASE_P(uint32_t,
@@ -1163,15 +1155,21 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue
} }
hwInfo->capabilityTable.blitterOperationsSupported = false; hwInfo->capabilityTable.blitterOperationsSupported = false;
MultiGraphicsAllocation multiAlloc{1};
MockGraphicsAllocation alloc{};
multiAlloc.addAllocation(&alloc);
alloc.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs selectionArgs{CL_COMMAND_READ_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr};
queue.isCopyOnly = false; queue.isCopyOnly = false;
EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(), EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(),
queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); queue.blitEnqueueAllowed(selectionArgs));
queue.isCopyOnly = true; queue.isCopyOnly = true;
EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); EXPECT_TRUE(queue.blitEnqueueAllowed(selectionArgs));
} }
TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { TEST(CommandQueue, givenSimpleClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
MockContext context{}; MockContext context{};
MockCommandQueue queue(&context, context.getDevice(0), 0, false); MockCommandQueue queue(&context, context.getDevice(0), 0, false);
@@ -1179,34 +1177,72 @@ TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectV
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
} }
bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(); MultiGraphicsAllocation multiAlloc{1};
MockGraphicsAllocation alloc{};
multiAlloc.addAllocation(&alloc);
alloc.memoryPool = MemoryPool::System4KBPages;
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); for (cl_command_type cmdType : {CL_COMMAND_READ_BUFFER, CL_COMMAND_READ_BUFFER_RECT,
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER)); CL_COMMAND_WRITE_BUFFER, CL_COMMAND_WRITE_BUFFER_RECT,
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER)); CL_COMMAND_COPY_BUFFER, CL_COMMAND_COPY_BUFFER_RECT,
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT)); CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP,
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT)); CL_COMMAND_SVM_MEMCPY}) {
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT)); CsrSelectionArgs args{cmdType, &multiAlloc, &multiAlloc, 0u, nullptr};
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE)); bool expectedValue = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE)); if (cmdType == CL_COMMAND_COPY_IMAGE_TO_BUFFER) {
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE)); expectedValue = false;
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER)); }
EXPECT_EQ(expectedValue, queue.blitEnqueueAllowed(args));
}
} }
TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) { TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
MockContext context{}; DebugManagerStateRestore restore{};
MockCommandQueue queue{context}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
BuiltinOpParams builtinOpParams{}; DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER, builtinOpParams)); MockContext context{};
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER, builtinOpParams)); MockCommandQueue queue(&context, context.getDevice(0), 0, false);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER_RECT, builtinOpParams)); if (!queue.bcsEngine) {
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER_RECT, builtinOpParams)); queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER_RECT, builtinOpParams)); }
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_IMAGE, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_IMAGE, builtinOpParams)); MockImageBase image{};
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams)); auto alloc = static_cast<MockGraphicsAllocation *>(image.getGraphicsAllocation(0));
alloc->memoryPool = MemoryPool::System4KBPages;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {1, 1, 1};
{
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image, {}, 0u, region, origin, nullptr};
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
}
{
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image, 0u, region, nullptr, origin};
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
}
{
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image, &image, 0u, region, origin, origin};
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
}
}
TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
MockContext context{};
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
if (!queue.bcsEngine) {
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
}
MultiGraphicsAllocation multiAlloc{1};
MockGraphicsAllocation alloc{};
multiAlloc.addAllocation(&alloc);
alloc.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE_TO_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr};
EXPECT_FALSE(queue.blitEnqueueAllowed(args));
} }
TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
@@ -1221,15 +1257,18 @@ TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePrefe
MockBuffer dstMemObj{dstGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation};
builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj; builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(args));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_FALSE(queue.blitEnqueuePreferred(args));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(args));
} }
TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) { TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
@@ -1244,73 +1283,41 @@ TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePr
builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj; builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; {
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(args));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(args));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
}
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; {
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(args));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(args));
} DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
}
TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { {
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
DebugManagerStateRestore restore{}; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
MockContext context{}; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
MockCommandQueue queue{context}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
BuiltinOpParams builtinOpParams{}; EXPECT_TRUE(queue.blitEnqueuePreferred(args));
MockGraphicsAllocation srcSvmAlloc{}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
MockGraphicsAllocation dstSvmAlloc{}; EXPECT_TRUE(queue.blitEnqueuePreferred(args));
builtinOpParams.srcSvmAlloc = &srcSvmAlloc; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
builtinOpParams.dstSvmAlloc = &dstSvmAlloc; EXPECT_TRUE(queue.blitEnqueuePreferred(args));
}
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
}
TEST(CommandQueue, givenNotLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcSvmAlloc{};
MockGraphicsAllocation dstSvmAlloc{};
builtinOpParams.srcSvmAlloc = &srcSvmAlloc;
builtinOpParams.dstSvmAlloc = &dstSvmAlloc;
srcSvmAlloc.memoryPool = MemoryPool::System4KBPages;
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
dstSvmAlloc.memoryPool = MemoryPool::System4KBPages;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
} }
TEST(CommandQueue, givenCopySizeAndOffsetWhenCallingBlitEnqueueImageAllowedThenReturnCorrectValue) { TEST(CommandQueue, givenCopySizeAndOffsetWhenCallingBlitEnqueueImageAllowedThenReturnCorrectValue) {

View File

@@ -0,0 +1,213 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/csr_selection_args.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_image.h"
#include "test.h"
namespace NEO {
TEST(CsrSelectionArgsTests, givenBuffersWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
const uint32_t rootDeviceIndex = 2u;
const size_t *size = reinterpret_cast<size_t *>(0x1234);
MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u};
MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u};
MockBuffer buffer1{allocation1};
MockBuffer buffer2{allocation2};
{
allocation1.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
allocation2.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::System4KBPages;
allocation2.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_BUFFER), args.cmdType);
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
}
TEST(CsrSelectionArgsTests, givenImagesWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
const uint32_t rootDeviceIndex = 2u;
const size_t *size = reinterpret_cast<size_t *>(0x1234);
const size_t *origin1 = reinterpret_cast<size_t *>(0x12345);
const size_t *origin2 = reinterpret_cast<size_t *>(0x123456);
MockImageBase image1{rootDeviceIndex};
MockImageBase image2{rootDeviceIndex};
MockGraphicsAllocation &allocation1 = *image1.graphicsAllocation;
MockGraphicsAllocation &allocation2 = *image2.graphicsAllocation;
{
allocation1.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.dstResource.image);
EXPECT_EQ(&allocation1, args.dstResource.allocation);
EXPECT_EQ(origin1, args.dstResource.imageOrigin);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.dstResource.image);
EXPECT_EQ(&allocation1, args.dstResource.allocation);
EXPECT_EQ(origin1, args.dstResource.imageOrigin);
}
{
allocation1.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.srcResource.image);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.srcResource.image);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
}
{
allocation1.memoryPool = MemoryPool::System4KBPages;
allocation2.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.srcResource.image);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
EXPECT_EQ(&image2, args.dstResource.image);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
EXPECT_EQ(origin2, args.dstResource.imageOrigin);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
allocation2.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_IMAGE), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&image1, args.srcResource.image);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
EXPECT_EQ(&image2, args.dstResource.image);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
EXPECT_EQ(origin2, args.dstResource.imageOrigin);
}
}
TEST(CsrSelectionArgsTests, givenGraphicsAllocationsWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
const uint32_t rootDeviceIndex = 2u;
const size_t *size = reinterpret_cast<size_t *>(0x1234);
MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u};
MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u};
MultiGraphicsAllocation multiAlloc1 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation1);
MultiGraphicsAllocation multiAlloc2 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation2);
{
allocation1.memoryPool = MemoryPool::System4KBPages;
allocation2.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::System4KBPages;
allocation2.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
allocation2.memoryPool = MemoryPool::System4KBPages;
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
{
allocation1.memoryPool = MemoryPool::LocalMemory;
allocation2.memoryPool = MemoryPool::LocalMemory;
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
EXPECT_EQ(TransferDirection::LocalToLocal, args.direction);
EXPECT_EQ(size, args.size);
EXPECT_EQ(&allocation1, args.srcResource.allocation);
EXPECT_EQ(&allocation2, args.dstResource.allocation);
}
}
} // namespace NEO

View File

@@ -353,9 +353,9 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThe
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled); EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled);
auto blitAllowed = mockCmdQ->blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY); MultiGraphicsAllocation &srcSvmAlloc = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr)->gpuAllocations;
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmAlloc, {}, 0, nullptr};
auto &csr = mockCmdQ->getCommandStreamReceiver(blitAllowed); CommandStreamReceiver &csr = mockCmdQ->selectCsrForBuiltinOperation(csrSelectionArgs);
EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled); EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled);
alignedFree(dstHostPtr); alignedFree(dstHostPtr);

View File

@@ -312,7 +312,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
return BaseClass::isCacheFlushForBcsRequired(); return BaseClass::isCacheFlushForBcsRequired();
} }
bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) override { bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const override {
isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image); isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image);
return isBlitEnqueueImageAllowed; return isBlitEnqueueImageAllowed;
} }
@@ -330,7 +330,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool notifyEnqueueSVMMemcpyCalled = false; bool notifyEnqueueSVMMemcpyCalled = false;
bool cpuDataTransferHandlerCalled = false; bool cpuDataTransferHandlerCalled = false;
bool useBcsCsrOnNotifyEnabled = false; bool useBcsCsrOnNotifyEnabled = false;
bool isBlitEnqueueImageAllowed = false; mutable bool isBlitEnqueueImageAllowed = false;
struct OverrideReturnValue { struct OverrideReturnValue {
bool enabled = false; bool enabled = false;
bool returnValue = false; bool returnValue = false;

View File

@@ -20,12 +20,15 @@ struct MockImageBase : public Image {
using Image::imageFormat; using Image::imageFormat;
MockGraphicsAllocation *graphicsAllocation = nullptr; MockGraphicsAllocation *graphicsAllocation = nullptr;
MockImageBase() : Image( MockImageBase(uint32_t rootDeviceIndex)
nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{}, : Image(nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{},
cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(nullptr, 0)), false, cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(rootDeviceIndex, nullptr, 0)), false,
0, 0, ClSurfaceFormatInfo{}, nullptr), 0, 0, ClSurfaceFormatInfo{}, nullptr),
graphicsAllocation(static_cast<MockGraphicsAllocation *>(multiGraphicsAllocation.getGraphicsAllocation(0))) { graphicsAllocation(static_cast<MockGraphicsAllocation *>(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex))) {
} }
MockImageBase() : MockImageBase(0u) {}
~MockImageBase() override { ~MockImageBase() override {
delete this->graphicsAllocation; delete this->graphicsAllocation;
} }