mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add selectCsrForBuiltinOperation method to OpenCL CommandQueue
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com> Related-To: NEO-6057
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
03ee6bc2dd
commit
858727010f
@@ -12,6 +12,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_common.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer.h
|
||||
|
||||
@@ -153,6 +153,19 @@ CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed)
|
||||
return getGpgpuCommandStreamReceiver();
|
||||
}
|
||||
|
||||
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const {
|
||||
const bool blitAllowed = blitEnqueueAllowed(args);
|
||||
const bool blitPreferred = blitEnqueuePreferred(args);
|
||||
const bool blitRequired = isCopyOnly;
|
||||
const bool blit = blitAllowed && (blitPreferred || blitRequired);
|
||||
|
||||
if (blit) {
|
||||
return *bcsEngine->commandStreamReceiver;
|
||||
} else {
|
||||
return getGpgpuCommandStreamReceiver();
|
||||
}
|
||||
}
|
||||
|
||||
Device &CommandQueue::getDevice() const noexcept {
|
||||
return device->getDevice();
|
||||
}
|
||||
@@ -725,15 +738,20 @@ bool CommandQueue::queueDependenciesClearRequired() const {
|
||||
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
|
||||
auto blitterSupported = bcsEngine != nullptr;
|
||||
bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
|
||||
if (bcsEngine == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
|
||||
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
|
||||
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
|
||||
}
|
||||
if (!blitEnqueueAllowed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (cmdType) {
|
||||
switch (args.cmdType) {
|
||||
case CL_COMMAND_READ_BUFFER:
|
||||
case CL_COMMAND_WRITE_BUFFER:
|
||||
case CL_COMMAND_COPY_BUFFER:
|
||||
@@ -741,30 +759,25 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
|
||||
case CL_COMMAND_WRITE_BUFFER_RECT:
|
||||
case CL_COMMAND_COPY_BUFFER_RECT:
|
||||
case CL_COMMAND_SVM_MEMCPY:
|
||||
case CL_COMMAND_SVM_MAP:
|
||||
case CL_COMMAND_SVM_UNMAP:
|
||||
return true;
|
||||
case CL_COMMAND_READ_IMAGE:
|
||||
return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image);
|
||||
case CL_COMMAND_WRITE_IMAGE:
|
||||
return blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image);
|
||||
|
||||
case CL_COMMAND_COPY_IMAGE:
|
||||
return blitterSupported && blitEnqueueAllowed;
|
||||
return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image) &&
|
||||
blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image);
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const {
|
||||
bool isLocalToLocal = false;
|
||||
|
||||
if (cmdType == CL_COMMAND_COPY_BUFFER &&
|
||||
builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() &&
|
||||
builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) {
|
||||
isLocalToLocal = true;
|
||||
}
|
||||
if (cmdType == CL_COMMAND_SVM_MEMCPY &&
|
||||
builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() &&
|
||||
builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) {
|
||||
isLocalToLocal = true;
|
||||
}
|
||||
|
||||
if (isLocalToLocal) {
|
||||
bool CommandQueue::blitEnqueuePreferred(const CsrSelectionArgs &args) const {
|
||||
if (args.direction == TransferDirection::LocalToLocal) {
|
||||
if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) {
|
||||
return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get());
|
||||
}
|
||||
@@ -775,7 +788,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) {
|
||||
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const {
|
||||
const auto &hwInfo = device->getHardwareInfo();
|
||||
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
|
||||
#include "opencl/source/command_queue/csr_selection_args.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
#include "opencl/source/helpers/base_object.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
@@ -225,6 +226,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
|
||||
CommandStreamReceiver *getBcsForAuxTranslation() const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const;
|
||||
Device &getDevice() const noexcept;
|
||||
ClDevice &getClDevice() const { return *device; }
|
||||
Context &getContext() const { return *context; }
|
||||
@@ -353,9 +355,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
|
||||
void providePerformanceHint(TransferProperties &transferProperties);
|
||||
bool queueDependenciesClearRequired() const;
|
||||
bool blitEnqueueAllowed(cl_command_type cmdType) const;
|
||||
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image);
|
||||
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
|
||||
bool blitEnqueuePreferred(const CsrSelectionArgs &args) const;
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
||||
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||
void waitForLatestTaskCount();
|
||||
|
||||
@@ -365,10 +365,10 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_event *event);
|
||||
|
||||
template <uint32_t cmdType, size_t surfaceCount>
|
||||
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed);
|
||||
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
|
||||
|
||||
template <uint32_t cmdType>
|
||||
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking);
|
||||
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
template <uint32_t commandType>
|
||||
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
|
||||
|
||||
97
opencl/source/command_queue/csr_selection_args.h
Normal file
97
opencl/source/command_queue/csr_selection_args.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/mem_obj/mem_obj.h"
|
||||
|
||||
namespace NEO {
|
||||
enum class TransferDirection {
|
||||
HostToHost,
|
||||
HostToLocal,
|
||||
LocalToHost,
|
||||
LocalToLocal,
|
||||
};
|
||||
|
||||
struct CsrSelectionArgs {
|
||||
struct Resource {
|
||||
bool isLocal = false;
|
||||
const GraphicsAllocation *allocation = nullptr;
|
||||
const Image *image = nullptr;
|
||||
const size_t *imageOrigin = nullptr;
|
||||
};
|
||||
|
||||
cl_command_type cmdType;
|
||||
const size_t *size = nullptr;
|
||||
Resource srcResource;
|
||||
Resource dstResource;
|
||||
TransferDirection direction;
|
||||
|
||||
CsrSelectionArgs(cl_command_type cmdType, const size_t *size)
|
||||
: cmdType(cmdType),
|
||||
size(size),
|
||||
direction(TransferDirection::HostToHost) {}
|
||||
|
||||
template <typename ResourceType>
|
||||
CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size)
|
||||
: cmdType(cmdType),
|
||||
size(size) {
|
||||
if (src) {
|
||||
processResource(*src, rootDeviceIndex, this->srcResource);
|
||||
}
|
||||
if (dst) {
|
||||
processResource(*dst, rootDeviceIndex, this->dstResource);
|
||||
}
|
||||
this->direction = createTransferDirection(srcResource.isLocal, dstResource.isLocal);
|
||||
}
|
||||
|
||||
CsrSelectionArgs(cl_command_type cmdType, Image *src, Image *dst, uint32_t rootDeviceIndex, const size_t *size, const size_t *srcOrigin, const size_t *dstOrigin)
|
||||
: CsrSelectionArgs(cmdType, src, dst, rootDeviceIndex, size) {
|
||||
if (src) {
|
||||
srcResource.imageOrigin = srcOrigin;
|
||||
}
|
||||
if (dst) {
|
||||
dstResource.imageOrigin = dstOrigin;
|
||||
}
|
||||
}
|
||||
|
||||
static void processResource(const Image &image, uint32_t rootDeviceIndex, Resource &outResource) {
|
||||
processResource(image.getMultiGraphicsAllocation(), rootDeviceIndex, outResource);
|
||||
outResource.image = ℑ
|
||||
}
|
||||
|
||||
static void processResource(const Buffer &buffer, uint32_t rootDeviceIndex, Resource &outResource) {
|
||||
processResource(buffer.getMultiGraphicsAllocation(), rootDeviceIndex, outResource);
|
||||
}
|
||||
|
||||
static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
|
||||
outResource.allocation = multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex);
|
||||
outResource.isLocal = outResource.allocation->isAllocatedInLocalMemoryPool();
|
||||
}
|
||||
|
||||
static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
|
||||
if (srcLocal) {
|
||||
if (dstLocal) {
|
||||
return TransferDirection::LocalToLocal;
|
||||
} else {
|
||||
return TransferDirection::LocalToHost;
|
||||
}
|
||||
} else {
|
||||
if (dstLocal) {
|
||||
return TransferDirection::HostToLocal;
|
||||
} else {
|
||||
return TransferDirection::HostToHost;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1162,9 +1162,8 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) {
|
||||
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
|
||||
auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||
auto &bcsCsr = *getBcsCommandStreamReceiver();
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
EventBuilder eventBuilder;
|
||||
@@ -1251,13 +1250,11 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType, size_t surfaceCount>
|
||||
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) {
|
||||
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
const bool blitRequired = isCopyOnly;
|
||||
const bool blit = blitAllowed && (blitPreferred || blitRequired);
|
||||
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
|
||||
const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
|
||||
|
||||
if (blit) {
|
||||
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking);
|
||||
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
} else {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
|
||||
this->getClDevice());
|
||||
|
||||
@@ -30,6 +30,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer;
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless;
|
||||
@@ -47,8 +51,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
MemObjSurface s1(srcBuffer);
|
||||
MemObjSurface s2(dstBuffer);
|
||||
Surface *surfaces[] = {&s1, &s2};
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
|
||||
@@ -54,8 +58,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -31,6 +31,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, dstImage, device->getRootDeviceIndex(), region, srcOrigin, dstOrigin};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
MemObjSurface srcImgSurf(srcImage);
|
||||
MemObjSurface dstImgSurf(dstImage);
|
||||
@@ -50,10 +54,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -36,8 +36,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
@@ -128,7 +129,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
}
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -35,8 +35,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto isMemTransferNeeded = true;
|
||||
if (buffer->isMemObjZeroCopy()) {
|
||||
size_t bufferOffset;
|
||||
size_t hostOffset;
|
||||
@@ -59,12 +62,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
|
||||
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -89,7 +90,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
dc.dstSlicePitch = hostSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);
|
||||
|
||||
@@ -40,9 +40,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage);
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, {}, device->getRootDeviceIndex(), region, origin, nullptr};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
@@ -105,7 +106,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
if (!isL3Capable(ptr, hostPtrSize)) {
|
||||
|
||||
@@ -106,8 +106,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &svmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
GeneralSurface dstSurface(svmData->cpuAllocation);
|
||||
GeneralSurface srcSurface(gpuAllocation);
|
||||
|
||||
@@ -126,8 +128,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
|
||||
if (event) {
|
||||
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
|
||||
@@ -189,8 +190,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, {}, &svmData->gpuAllocations, device->getRootDeviceIndex(), &svmOperation->regionSize};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
gpuAllocation->setAubWritable(true, GraphicsAllocation::defaultBank);
|
||||
gpuAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
||||
|
||||
@@ -210,8 +213,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
if (event) {
|
||||
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
|
||||
@@ -328,12 +330,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
cl_command_type cmdType;
|
||||
|
||||
if (copyType == SvmToHost) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
cmdType = CL_COMMAND_READ_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -346,15 +348,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == HostToSvm) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -367,9 +370,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &srcHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == SvmToSvm) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
|
||||
@@ -378,16 +385,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstSvmSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
@@ -401,7 +408,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
}
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
|
||||
@@ -31,10 +31,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, {}, buffer, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
|
||||
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
|
||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
|
||||
numEventsInWaitList, eventWaitList);
|
||||
@@ -82,7 +84,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&bufferSurf, nullptr};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
@@ -95,8 +96,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -116,7 +115,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
|
||||
|
||||
@@ -34,6 +34,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
if (buffer->isMemObjZeroCopy()) {
|
||||
@@ -58,12 +62,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
MemObjSurface dstBufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
|
||||
Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -88,7 +90,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
dc.dstSlicePitch = bufferSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
|
||||
|
||||
@@ -34,7 +34,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
auto cmdType = CL_COMMAND_WRITE_IMAGE;
|
||||
constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, device->getRootDeviceIndex(), region, nullptr, origin};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
if (dstImage->isMemObjZeroCopy()) {
|
||||
@@ -54,7 +58,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&dstImgSurf, nullptr};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage);
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
@@ -66,7 +69,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
@@ -94,7 +96,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
|
||||
|
||||
@@ -10,6 +10,7 @@ set(IGDRCL_SRCS_tests_command_queue
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests.cpp
|
||||
|
||||
@@ -230,23 +230,19 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi
|
||||
|
||||
struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t> {};
|
||||
|
||||
TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) {
|
||||
TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = false;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
|
||||
auto cmdType = GetParam();
|
||||
|
||||
EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver());
|
||||
|
||||
auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
|
||||
EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver());
|
||||
|
||||
auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
|
||||
EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver(blitAllowed));
|
||||
}
|
||||
|
||||
HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) {
|
||||
TEST(CommandQueue, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) {
|
||||
DebugManagerStateRestore restorer;
|
||||
VariableBackup<bool> mockDeviceFlagBackup{&MockDevice::createSingleDevice, false};
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
@@ -263,13 +259,9 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin
|
||||
auto &bcsEngine = subDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular);
|
||||
|
||||
MockCommandQueue cmdQ(nullptr, device.get(), 0, false);
|
||||
auto cmdType = GetParam();
|
||||
auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
|
||||
|
||||
EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver());
|
||||
EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver());
|
||||
EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.getCommandStreamReceiver(blitAllowed));
|
||||
EXPECT_EQ(bcsEngine.osContext, &cmdQ.getCommandStreamReceiver(blitAllowed).getOsContext());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(uint32_t,
|
||||
@@ -1163,15 +1155,21 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue
|
||||
}
|
||||
hwInfo->capabilityTable.blitterOperationsSupported = false;
|
||||
|
||||
MultiGraphicsAllocation multiAlloc{1};
|
||||
MockGraphicsAllocation alloc{};
|
||||
multiAlloc.addAllocation(&alloc);
|
||||
alloc.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs selectionArgs{CL_COMMAND_READ_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr};
|
||||
|
||||
queue.isCopyOnly = false;
|
||||
EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(),
|
||||
queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
|
||||
queue.blitEnqueueAllowed(selectionArgs));
|
||||
|
||||
queue.isCopyOnly = true;
|
||||
EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
|
||||
EXPECT_TRUE(queue.blitEnqueueAllowed(selectionArgs));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
|
||||
TEST(CommandQueue, givenSimpleClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
|
||||
MockContext context{};
|
||||
|
||||
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
|
||||
@@ -1179,34 +1177,72 @@ TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectV
|
||||
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
|
||||
}
|
||||
|
||||
bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
|
||||
MultiGraphicsAllocation multiAlloc{1};
|
||||
MockGraphicsAllocation alloc{};
|
||||
multiAlloc.addAllocation(&alloc);
|
||||
alloc.memoryPool = MemoryPool::System4KBPages;
|
||||
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE));
|
||||
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE));
|
||||
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER));
|
||||
for (cl_command_type cmdType : {CL_COMMAND_READ_BUFFER, CL_COMMAND_READ_BUFFER_RECT,
|
||||
CL_COMMAND_WRITE_BUFFER, CL_COMMAND_WRITE_BUFFER_RECT,
|
||||
CL_COMMAND_COPY_BUFFER, CL_COMMAND_COPY_BUFFER_RECT,
|
||||
CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP,
|
||||
CL_COMMAND_SVM_MEMCPY}) {
|
||||
CsrSelectionArgs args{cmdType, &multiAlloc, &multiAlloc, 0u, nullptr};
|
||||
|
||||
bool expectedValue = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
|
||||
if (cmdType == CL_COMMAND_COPY_IMAGE_TO_BUFFER) {
|
||||
expectedValue = false;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedValue, queue.blitEnqueueAllowed(args));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
BuiltinOpParams builtinOpParams{};
|
||||
TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
|
||||
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER_RECT, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER_RECT, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER_RECT, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_IMAGE, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_IMAGE, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams));
|
||||
MockContext context{};
|
||||
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
|
||||
if (!queue.bcsEngine) {
|
||||
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
|
||||
}
|
||||
|
||||
MockImageBase image{};
|
||||
auto alloc = static_cast<MockGraphicsAllocation *>(image.getGraphicsAllocation(0));
|
||||
alloc->memoryPool = MemoryPool::System4KBPages;
|
||||
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {1, 1, 1};
|
||||
{
|
||||
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image, {}, 0u, region, origin, nullptr};
|
||||
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
|
||||
}
|
||||
{
|
||||
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image, 0u, region, nullptr, origin};
|
||||
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
|
||||
}
|
||||
{
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image, &image, 0u, region, origin, origin};
|
||||
EXPECT_TRUE(queue.blitEnqueueAllowed(args));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
|
||||
MockContext context{};
|
||||
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
|
||||
if (!queue.bcsEngine) {
|
||||
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
|
||||
}
|
||||
|
||||
MultiGraphicsAllocation multiAlloc{1};
|
||||
MockGraphicsAllocation alloc{};
|
||||
multiAlloc.addAllocation(&alloc);
|
||||
alloc.memoryPool = MemoryPool::System4KBPages;
|
||||
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE_TO_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr};
|
||||
EXPECT_FALSE(queue.blitEnqueueAllowed(args));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
|
||||
@@ -1221,15 +1257,18 @@ TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePrefe
|
||||
MockBuffer dstMemObj{dstGraphicsAllocation};
|
||||
builtinOpParams.srcMemObj = &srcMemObj;
|
||||
builtinOpParams.dstMemObj = &dstMemObj;
|
||||
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
|
||||
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(args));
|
||||
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
EXPECT_FALSE(queue.blitEnqueuePreferred(args));
|
||||
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
|
||||
@@ -1244,73 +1283,41 @@ TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePr
|
||||
builtinOpParams.srcMemObj = &srcMemObj;
|
||||
builtinOpParams.dstMemObj = &dstMemObj;
|
||||
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
{
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
}
|
||||
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
|
||||
}
|
||||
{
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
|
||||
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers();
|
||||
DebugManagerStateRestore restore{};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
BuiltinOpParams builtinOpParams{};
|
||||
MockGraphicsAllocation srcSvmAlloc{};
|
||||
MockGraphicsAllocation dstSvmAlloc{};
|
||||
builtinOpParams.srcSvmAlloc = &srcSvmAlloc;
|
||||
builtinOpParams.dstSvmAlloc = &dstSvmAlloc;
|
||||
|
||||
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
|
||||
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenNotLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
|
||||
DebugManagerStateRestore restore{};
|
||||
MockContext context{};
|
||||
MockCommandQueue queue{context};
|
||||
BuiltinOpParams builtinOpParams{};
|
||||
MockGraphicsAllocation srcSvmAlloc{};
|
||||
MockGraphicsAllocation dstSvmAlloc{};
|
||||
builtinOpParams.srcSvmAlloc = &srcSvmAlloc;
|
||||
builtinOpParams.dstSvmAlloc = &dstSvmAlloc;
|
||||
|
||||
srcSvmAlloc.memoryPool = MemoryPool::System4KBPages;
|
||||
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
|
||||
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
|
||||
dstSvmAlloc.memoryPool = MemoryPool::System4KBPages;
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
|
||||
{
|
||||
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr};
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
|
||||
EXPECT_TRUE(queue.blitEnqueuePreferred(args));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenCopySizeAndOffsetWhenCallingBlitEnqueueImageAllowedThenReturnCorrectValue) {
|
||||
|
||||
213
opencl/test/unit_test/command_queue/csr_selection_args_tests.cpp
Normal file
213
opencl/test/unit_test/command_queue/csr_selection_args_tests.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/csr_selection_args.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_image.h"
|
||||
#include "test.h"
|
||||
|
||||
namespace NEO {
|
||||
TEST(CsrSelectionArgsTests, givenBuffersWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
|
||||
const uint32_t rootDeviceIndex = 2u;
|
||||
const size_t *size = reinterpret_cast<size_t *>(0x1234);
|
||||
|
||||
MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u};
|
||||
MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u};
|
||||
MockBuffer buffer1{allocation1};
|
||||
MockBuffer buffer2{allocation2};
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.dstResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.dstResource.allocation);
|
||||
}
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
}
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
allocation2.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
allocation2.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_BUFFER), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CsrSelectionArgsTests, givenImagesWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
|
||||
const uint32_t rootDeviceIndex = 2u;
|
||||
const size_t *size = reinterpret_cast<size_t *>(0x1234);
|
||||
const size_t *origin1 = reinterpret_cast<size_t *>(0x12345);
|
||||
const size_t *origin2 = reinterpret_cast<size_t *>(0x123456);
|
||||
|
||||
MockImageBase image1{rootDeviceIndex};
|
||||
MockImageBase image2{rootDeviceIndex};
|
||||
MockGraphicsAllocation &allocation1 = *image1.graphicsAllocation;
|
||||
MockGraphicsAllocation &allocation2 = *image2.graphicsAllocation;
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.dstResource.image);
|
||||
EXPECT_EQ(&allocation1, args.dstResource.allocation);
|
||||
EXPECT_EQ(origin1, args.dstResource.imageOrigin);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_WRITE_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.dstResource.image);
|
||||
EXPECT_EQ(&allocation1, args.dstResource.allocation);
|
||||
EXPECT_EQ(origin1, args.dstResource.imageOrigin);
|
||||
}
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.srcResource.image);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_READ_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.srcResource.image);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
|
||||
}
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
allocation2.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.srcResource.image);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
|
||||
EXPECT_EQ(&image2, args.dstResource.image);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
EXPECT_EQ(origin2, args.dstResource.imageOrigin);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
allocation2.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_COPY_IMAGE), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&image1, args.srcResource.image);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(origin1, args.srcResource.imageOrigin);
|
||||
EXPECT_EQ(&image2, args.dstResource.image);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
EXPECT_EQ(origin2, args.dstResource.imageOrigin);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CsrSelectionArgsTests, givenGraphicsAllocationsWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) {
|
||||
const uint32_t rootDeviceIndex = 2u;
|
||||
const size_t *size = reinterpret_cast<size_t *>(0x1234);
|
||||
|
||||
MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u};
|
||||
MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u};
|
||||
MultiGraphicsAllocation multiAlloc1 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation1);
|
||||
MultiGraphicsAllocation multiAlloc2 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation2);
|
||||
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
allocation2.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::System4KBPages;
|
||||
allocation2.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::HostToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
allocation2.memoryPool = MemoryPool::System4KBPages;
|
||||
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToHost, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
{
|
||||
allocation1.memoryPool = MemoryPool::LocalMemory;
|
||||
allocation2.memoryPool = MemoryPool::LocalMemory;
|
||||
CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size};
|
||||
EXPECT_EQ(static_cast<cl_command_type>(CL_COMMAND_SVM_MEMCPY), args.cmdType);
|
||||
EXPECT_EQ(TransferDirection::LocalToLocal, args.direction);
|
||||
EXPECT_EQ(size, args.size);
|
||||
EXPECT_EQ(&allocation1, args.srcResource.allocation);
|
||||
EXPECT_EQ(&allocation2, args.dstResource.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -353,9 +353,9 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThe
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled);
|
||||
|
||||
auto blitAllowed = mockCmdQ->blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
|
||||
|
||||
auto &csr = mockCmdQ->getCommandStreamReceiver(blitAllowed);
|
||||
MultiGraphicsAllocation &srcSvmAlloc = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr)->gpuAllocations;
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmAlloc, {}, 0, nullptr};
|
||||
CommandStreamReceiver &csr = mockCmdQ->selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled);
|
||||
|
||||
alignedFree(dstHostPtr);
|
||||
|
||||
@@ -312,7 +312,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return BaseClass::isCacheFlushForBcsRequired();
|
||||
}
|
||||
|
||||
bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) override {
|
||||
bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const override {
|
||||
isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image);
|
||||
return isBlitEnqueueImageAllowed;
|
||||
}
|
||||
@@ -330,7 +330,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
bool notifyEnqueueSVMMemcpyCalled = false;
|
||||
bool cpuDataTransferHandlerCalled = false;
|
||||
bool useBcsCsrOnNotifyEnabled = false;
|
||||
bool isBlitEnqueueImageAllowed = false;
|
||||
mutable bool isBlitEnqueueImageAllowed = false;
|
||||
struct OverrideReturnValue {
|
||||
bool enabled = false;
|
||||
bool returnValue = false;
|
||||
|
||||
@@ -20,12 +20,15 @@ struct MockImageBase : public Image {
|
||||
using Image::imageFormat;
|
||||
MockGraphicsAllocation *graphicsAllocation = nullptr;
|
||||
|
||||
MockImageBase() : Image(
|
||||
nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{},
|
||||
cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(nullptr, 0)), false,
|
||||
0, 0, ClSurfaceFormatInfo{}, nullptr),
|
||||
graphicsAllocation(static_cast<MockGraphicsAllocation *>(multiGraphicsAllocation.getGraphicsAllocation(0))) {
|
||||
MockImageBase(uint32_t rootDeviceIndex)
|
||||
: Image(nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{},
|
||||
cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(rootDeviceIndex, nullptr, 0)), false,
|
||||
0, 0, ClSurfaceFormatInfo{}, nullptr),
|
||||
graphicsAllocation(static_cast<MockGraphicsAllocation *>(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex))) {
|
||||
}
|
||||
|
||||
MockImageBase() : MockImageBase(0u) {}
|
||||
|
||||
~MockImageBase() override {
|
||||
delete this->graphicsAllocation;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user