Correct CSR used for hostptr allocations in builtins

Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban
2021-08-31 19:22:58 +00:00
committed by Compute-Runtime-Automation
parent 582bb3786d
commit 69ae9dc9c2
17 changed files with 229 additions and 229 deletions

View File

@ -43,6 +43,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h
${CMAKE_CURRENT_SOURCE_DIR}/transfer_direction.h
)
if(SUPPORT_XEHP_AND_LATER)

View File

@ -144,9 +144,9 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
return nullptr;
}
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const {
const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams());
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const {
const bool blitAllowed = blitEnqueueAllowed(cmdType, imagesValidForBlit);
const bool blitPreferred = blitEnqueuePreferred(transferDirection);
const bool blitRequired = isCopyOnly;
const bool blit = blitAllowed && (blitPreferred || blitRequired);
@ -725,7 +725,7 @@ bool CommandQueue::queueDependenciesClearRequired() const {
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
}
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const {
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const {
if (bcsEngine == nullptr) {
return false;
}
@ -748,32 +748,16 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpPa
case CL_COMMAND_SVM_MEMCPY:
return true;
case CL_COMMAND_READ_IMAGE:
return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj));
case CL_COMMAND_WRITE_IMAGE:
return blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
case CL_COMMAND_COPY_IMAGE:
return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj)) &&
blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
return imagesValidForBlit;
default:
return false;
}
}
bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const {
bool isLocalToLocal = false;
if (cmdType == CL_COMMAND_COPY_BUFFER &&
builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) {
isLocalToLocal = true;
}
if (cmdType == CL_COMMAND_SVM_MEMCPY &&
builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) {
isLocalToLocal = true;
}
if (isLocalToLocal) {
bool CommandQueue::blitEnqueuePreferred(TransferDirection transferDirection) const {
if (transferDirection == TransferDirection::LocalToLocal) {
if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) {
return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get());
}

View File

@ -8,6 +8,7 @@
#pragma once
#include "shared/source/helpers/engine_control.h"
#include "opencl/source/command_queue/transfer_direction.h"
#include "opencl/source/event/event.h"
#include "opencl/source/helpers/base_object.h"
#include "opencl/source/helpers/dispatch_info.h"
@ -224,7 +225,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
CommandStreamReceiver *getBcsForAuxTranslation() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const;
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const;
Device &getDevice() const noexcept;
ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; }
@ -353,8 +354,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const;
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
bool blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const;
bool blitEnqueuePreferred(TransferDirection transferDirection) const;
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;

View File

@ -31,6 +31,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
cl_event *event) {
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer;
GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless;
}
@ -48,7 +52,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
MemObjSurface s2(dstBuffer);
Surface *surfaces[] = {&s1, &s2};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;

View File

@ -34,6 +34,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
cl_event *event) {
auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
}
@ -54,7 +58,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.dstSlicePitch = dstSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;

View File

@ -32,6 +32,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
const cl_event *eventWaitList,
cl_event *event) {
const bool validImages = blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcImageAlloc, *dstImageAlloc), validImages);
MemObjSurface srcImgSurf(srcImage);
MemObjSurface dstImgSurf(dstImage);
Surface *surfaces[] = {&srcImgSurf, &dstImgSurf};
@ -50,7 +55,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
}
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
return CL_SUCCESS;

View File

@ -41,6 +41,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr,
numEventsInWaitList, eventWaitList);
GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
//check if we are dealing with SVM pointer here for which we already have an allocation
@ -99,7 +101,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -126,7 +128,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
}
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}

View File

@ -37,6 +37,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
auto isMemTransferNeeded = true;
GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
if (buffer->isMemObjZeroCopy()) {
size_t bufferOffset;
size_t hostOffset;
@ -63,7 +66,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -87,7 +90,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.dstSlicePitch = hostSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
if (context->isProvidingPerformanceHints()) {

View File

@ -42,6 +42,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
cl_event *event) {
cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
const bool validImages = blitEnqueueImageAllowed(origin, region, *srcImage);
GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcImageAlloc), validImages);
auto isMemTransferNeeded = true;
if (srcImage->isMemObjZeroCopy()) {
size_t hostOffset;
@ -72,7 +76,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -99,7 +103,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo);
if (nullptr == mapAllocation) {
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}

View File

@ -126,7 +126,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MAP, TransferDirectionHelper::fromGfxAllocToHost(*gpuAllocation), false);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
if (event) {
@ -210,7 +210,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_UNMAP, TransferDirectionHelper::fromHostToGfxAlloc(*gpuAllocation), false);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
if (event) {
@ -328,10 +328,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
cl_command_type cmdType;
if (copyType == SvmToHost) {
GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
HostPtrSurface dstHostPtrSurf(dstPtr, size);
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -343,16 +346,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == HostToSvm) {
GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
cmdType = CL_COMMAND_WRITE_BUFFER;
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -364,10 +369,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &srcHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else if (copyType == SvmToSvm) {
GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
@ -376,15 +384,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstSvmSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else {
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirection::HostToHost, false);
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
HostPtrSurface dstHostPtrSurf(dstPtr, size);
cmdType = CL_COMMAND_WRITE_BUFFER;
if (size != 0) {
auto &csr = getGpgpuCommandStreamReceiver();
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) {
@ -398,7 +406,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
}
if (event) {

View File

@ -38,6 +38,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList);
GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
//check if we are dealing with SVM pointer here for which we already have an allocation
@ -94,7 +96,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -113,7 +115,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (context->isProvidingPerformanceHints()) {

View File

@ -36,6 +36,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT;
auto isMemTransferNeeded = true;
GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
if (buffer->isMemObjZeroCopy()) {
size_t bufferOffset;
size_t hostOffset;
@ -62,7 +65,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -86,7 +89,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
dc.dstSlicePitch = bufferSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (context->isProvidingPerformanceHints()) {

View File

@ -37,6 +37,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto cmdType = CL_COMMAND_WRITE_IMAGE;
auto isMemTransferNeeded = true;
const bool validImages = blitEnqueueImageAllowed(origin, region, *dstImage);
GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstImageAlloc), validImages);
if (dstImage->isMemObjZeroCopy()) {
size_t hostOffset;
Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
@ -65,7 +69,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -92,7 +96,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
if (context->isProvidingPerformanceHints()) {

View File

@ -0,0 +1,53 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/memory_manager/graphics_allocation.h"
namespace NEO {
enum class TransferDirection {
HostToHost,
HostToLocal,
LocalToHost,
LocalToLocal,
};
struct TransferDirectionHelper {
static inline TransferDirection fromGfxAllocToHost(const GraphicsAllocation &src) {
const bool srcLocal = src.isAllocatedInLocalMemoryPool();
return create(srcLocal, false);
}
static inline TransferDirection fromHostToGfxAlloc(const GraphicsAllocation &dst) {
const bool dstLocal = dst.isAllocatedInLocalMemoryPool();
return create(false, dstLocal);
}
static inline TransferDirection fromGfxAllocToGfxAlloc(const GraphicsAllocation &src, const GraphicsAllocation &dst) {
const bool srcLocal = src.isAllocatedInLocalMemoryPool();
const bool dstLocal = dst.isAllocatedInLocalMemoryPool();
return create(srcLocal, dstLocal);
}
static inline TransferDirection create(bool srcLocal, bool dstLocal) {
if (srcLocal) {
if (dstLocal) {
return TransferDirection::LocalToLocal;
} else {
return TransferDirection::LocalToHost;
}
} else {
if (dstLocal) {
return TransferDirection::HostToLocal;
} else {
return TransferDirection::HostToHost;
}
}
}
};
} // namespace NEO

View File

@ -283,7 +283,7 @@ TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperatio
BuiltinOpParams params = createParams(cmdType);
MultiDispatchInfo dispatchInfo{params};
EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo));
EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, TransferDirection::LocalToHost, true));
}
HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) {
@ -304,13 +304,10 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin
MockCommandQueue cmdQ(nullptr, device.get(), 0, false);
auto cmdType = GetParam();
BuiltinOpParams params = createParams(cmdType);
MultiDispatchInfo dispatchInfo{params};
EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver());
EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver());
EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo));
EXPECT_EQ(bcsEngine.osContext, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo).getOsContext());
auto &csr = cmdQ.selectCsrForBuiltinOperation(cmdType, TransferDirection::LocalToHost, true);
EXPECT_EQ(bcsEngine.commandStreamReceiver, &csr);
EXPECT_EQ(bcsEngine.osContext, &csr.getOsContext());
}
INSTANTIATE_TEST_CASE_P(uint32_t,
@ -1206,10 +1203,10 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue
queue.isCopyOnly = false;
EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(),
queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false));
queue.isCopyOnly = true;
EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false));
}
struct CommandQueueBuiltinTest : BuiltinOpParamsFixture, ::testing::Test {};
@ -1227,59 +1224,20 @@ TEST_F(CommandQueueBuiltinTest, givenClCommandWhenCallingBlitEnqueueAllowedThenR
bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, {}));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, createParams(CL_COMMAND_READ_IMAGE)));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, createParams(CL_COMMAND_WRITE_IMAGE)));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, createParams(CL_COMMAND_COPY_IMAGE)));
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, {}));
}
TEST_F(CommandQueueBuiltinTest, givenCopyImageCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
DebugManagerStateRestore restore{};
DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
MockContext context{};
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
if (!queue.bcsEngine) {
queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
}
if (!queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
GTEST_SKIP();
}
auto builtinOpParams = createParams(CL_COMMAND_COPY_IMAGE);
EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
builtinOpParams.srcOffset[0] = 0x9999;
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
builtinOpParams.dstOffset[0] = 0x9999;
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
builtinOpParams.srcOffset = correctOrigin;
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
}
TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) {
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER_RECT, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER_RECT, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER_RECT, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_IMAGE, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_IMAGE, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, false));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, true));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, true));
EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, true));
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, false));
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, false));
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, false));
EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, false));
}
TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
@ -1287,103 +1245,34 @@ TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePrefe
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcGraphicsAllocation{};
MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(TransferDirection::LocalToLocal));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
EXPECT_FALSE(queue.blitEnqueuePreferred(TransferDirection::LocalToLocal));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToLocal));
}
TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcGraphicsAllocation{};
MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
}
TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers();
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcSvmAlloc{};
MockGraphicsAllocation dstSvmAlloc{};
builtinOpParams.srcSvmAlloc = &srcSvmAlloc;
builtinOpParams.dstSvmAlloc = &dstSvmAlloc;
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
}
TEST(CommandQueue, givenNotLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcSvmAlloc{};
MockGraphicsAllocation dstSvmAlloc{};
builtinOpParams.srcSvmAlloc = &srcSvmAlloc;
builtinOpParams.dstSvmAlloc = &dstSvmAlloc;
srcSvmAlloc.memoryPool = MemoryPool::System4KBPages;
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
dstSvmAlloc.memoryPool = MemoryPool::System4KBPages;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost));
EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost));
}
TEST(CommandQueue, givenCopySizeAndOffsetWhenCallingBlitEnqueueImageAllowedThenReturnCorrectValue) {
@ -1987,3 +1876,44 @@ TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQu
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}
TEST(TransferDirectionHelperTest, givenAllocationWhenGfxAllocToHostCalledThenReturnCorrectResult) {
MockGraphicsAllocation srcAlloc{};
srcAlloc.memoryPool = MemoryPool::System4KBPages;
EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromGfxAllocToHost(srcAlloc));
srcAlloc.memoryPool = MemoryPool::LocalMemory;
EXPECT_EQ(TransferDirection::LocalToHost, TransferDirectionHelper::fromGfxAllocToHost(srcAlloc));
}
TEST(TransferDirectionHelperTest, givenAllocationWhenHostToGfxAllocCalledThenReturnCorrectResult) {
MockGraphicsAllocation dstAlloc{};
dstAlloc.memoryPool = MemoryPool::System4KBPages;
EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromHostToGfxAlloc(dstAlloc));
dstAlloc.memoryPool = MemoryPool::LocalMemory;
EXPECT_EQ(TransferDirection::HostToLocal, TransferDirectionHelper::fromHostToGfxAlloc(dstAlloc));
}
TEST(TransferDirectionHelperTest, givenAllocationWhenGfxAllocToGfxAllocCalledThenReturnCorrectResult) {
MockGraphicsAllocation srcAlloc{};
MockGraphicsAllocation dstAlloc{};
srcAlloc.memoryPool = MemoryPool::System4KBPages;
dstAlloc.memoryPool = MemoryPool::System4KBPages;
EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc));
srcAlloc.memoryPool = MemoryPool::LocalMemory;
dstAlloc.memoryPool = MemoryPool::System4KBPages;
EXPECT_EQ(TransferDirection::LocalToHost, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc));
srcAlloc.memoryPool = MemoryPool::System4KBPages;
dstAlloc.memoryPool = MemoryPool::LocalMemory;
EXPECT_EQ(TransferDirection::HostToLocal, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc));
srcAlloc.memoryPool = MemoryPool::LocalMemory;
dstAlloc.memoryPool = MemoryPool::LocalMemory;
EXPECT_EQ(TransferDirection::LocalToLocal, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc));
}

View File

@ -212,6 +212,7 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne)
HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImageThenBlitEnqueueImageAllowedReturnsCorrectResult) {
auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
hwInfo->capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(hwInfo);
DebugManagerStateRestore restorer;
@ -219,7 +220,6 @@ HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImage
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
hwInfo->capabilityTable.blitterOperationsSupported = true;
size_t srcOrigin[] = {0, 0, 0};
size_t dstOrigin[] = {0, 0, 0};

View File

@ -875,12 +875,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
void *hostPtr = reinterpret_cast<void *>(0x12340000);
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
bool tempAllocationFound = false;
auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
while (tempAllocation) {
if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
tempAllocationFound = true;
@ -891,7 +891,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
EXPECT_TRUE(tempAllocationFound);
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) {
@ -920,12 +920,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
bool tempAllocationFound = false;
auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
while (tempAllocation) {
if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
tempAllocationFound = true;
@ -938,7 +938,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenCallWait) {
@ -967,12 +967,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
cmdQ->enqueueWriteBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
bool tempAllocationFound = false;
auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
while (tempAllocation) {
if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
tempAllocationFound = true;
@ -985,7 +985,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
cmdQ->enqueueWriteBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) {
@ -1008,12 +1008,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
void *hostPtr = reinterpret_cast<void *>(0x12340000);
cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
bool tempAllocationFound = false;
auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
while (tempAllocation) {
if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
tempAllocationFound = true;
@ -1024,7 +1024,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
EXPECT_TRUE(tempAllocationFound);
cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCalledWhenUsingBcsThenCallWait) {
@ -1046,16 +1046,16 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa
cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
EXPECT_EQ(0u, tempAlloc->countSuccessors());
EXPECT_EQ(pDstSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
@ -1078,17 +1078,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq
auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields());
cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr);
EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
EXPECT_EQ(0u, tempAlloc->countSuccessors());
EXPECT_EQ(pSrcSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM);
}
@ -1112,17 +1112,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV
cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
EXPECT_EQ(1u, tempAlloc->countSuccessors());
EXPECT_EQ(pSrcSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
EXPECT_EQ(pDstSVM.get(), reinterpret_cast<void *>(tempAlloc->next->getGpuAddress()));
cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) {