mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-31 20:13:04 +08:00
Correct CSR used for hostptr allocations in builtins
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
582bb3786d
commit
69ae9dc9c2
@@ -43,6 +43,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/transfer_direction.h
|
||||
)
|
||||
|
||||
if(SUPPORT_XEHP_AND_LATER)
|
||||
|
||||
@@ -144,9 +144,9 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const {
|
||||
const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const {
|
||||
const bool blitAllowed = blitEnqueueAllowed(cmdType, imagesValidForBlit);
|
||||
const bool blitPreferred = blitEnqueuePreferred(transferDirection);
|
||||
const bool blitRequired = isCopyOnly;
|
||||
const bool blit = blitAllowed && (blitPreferred || blitRequired);
|
||||
|
||||
@@ -725,7 +725,7 @@ bool CommandQueue::queueDependenciesClearRequired() const {
|
||||
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const {
|
||||
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const {
|
||||
if (bcsEngine == nullptr) {
|
||||
return false;
|
||||
}
|
||||
@@ -748,32 +748,16 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpPa
|
||||
case CL_COMMAND_SVM_MEMCPY:
|
||||
return true;
|
||||
case CL_COMMAND_READ_IMAGE:
|
||||
return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast<Image *>(params.srcMemObj));
|
||||
case CL_COMMAND_WRITE_IMAGE:
|
||||
return blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast<Image *>(params.dstMemObj));
|
||||
case CL_COMMAND_COPY_IMAGE:
|
||||
return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast<Image *>(params.srcMemObj)) &&
|
||||
blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast<Image *>(params.dstMemObj));
|
||||
return imagesValidForBlit;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const {
|
||||
bool isLocalToLocal = false;
|
||||
|
||||
if (cmdType == CL_COMMAND_COPY_BUFFER &&
|
||||
builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() &&
|
||||
builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) {
|
||||
isLocalToLocal = true;
|
||||
}
|
||||
if (cmdType == CL_COMMAND_SVM_MEMCPY &&
|
||||
builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() &&
|
||||
builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) {
|
||||
isLocalToLocal = true;
|
||||
}
|
||||
|
||||
if (isLocalToLocal) {
|
||||
bool CommandQueue::blitEnqueuePreferred(TransferDirection transferDirection) const {
|
||||
if (transferDirection == TransferDirection::LocalToLocal) {
|
||||
if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) {
|
||||
return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get());
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
|
||||
#include "opencl/source/command_queue/transfer_direction.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
#include "opencl/source/helpers/base_object.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
@@ -224,7 +225,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
|
||||
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
|
||||
CommandStreamReceiver *getBcsForAuxTranslation() const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const;
|
||||
Device &getDevice() const noexcept;
|
||||
ClDevice &getClDevice() const { return *device; }
|
||||
Context &getContext() const { return *context; }
|
||||
@@ -353,8 +354,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
|
||||
void providePerformanceHint(TransferProperties &transferProperties);
|
||||
bool queueDependenciesClearRequired() const;
|
||||
bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const;
|
||||
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
|
||||
bool blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const;
|
||||
bool blitEnqueuePreferred(TransferDirection transferDirection) const;
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
||||
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||
|
||||
@@ -31,6 +31,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
cl_event *event) {
|
||||
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer;
|
||||
|
||||
GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless;
|
||||
}
|
||||
@@ -48,7 +52,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
MemObjSurface s2(dstBuffer);
|
||||
Surface *surfaces[] = {&s1, &s2};
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
|
||||
@@ -34,6 +34,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
cl_event *event) {
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
|
||||
|
||||
GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
|
||||
|
||||
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
|
||||
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
|
||||
}
|
||||
@@ -54,7 +58,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
|
||||
@@ -32,6 +32,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
const bool validImages = blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
|
||||
GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcImageAlloc, *dstImageAlloc), validImages);
|
||||
|
||||
MemObjSurface srcImgSurf(srcImage);
|
||||
MemObjSurface dstImgSurf(dstImage);
|
||||
Surface *surfaces[] = {&srcImgSurf, &dstImgSurf};
|
||||
@@ -50,7 +55,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
|
||||
@@ -41,6 +41,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
|
||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr,
|
||||
numEventsInWaitList, eventWaitList);
|
||||
GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
|
||||
|
||||
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
//check if we are dealing with SVM pointer here for which we already have an allocation
|
||||
@@ -99,7 +101,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -126,7 +128,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
}
|
||||
}
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
@@ -37,6 +37,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
|
||||
|
||||
if (buffer->isMemObjZeroCopy()) {
|
||||
size_t bufferOffset;
|
||||
size_t hostOffset;
|
||||
@@ -63,7 +66,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -87,7 +90,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
dc.dstSlicePitch = hostSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
|
||||
@@ -42,6 +42,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
cl_event *event) {
|
||||
cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
|
||||
|
||||
const bool validImages = blitEnqueueImageAllowed(origin, region, *srcImage);
|
||||
GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcImageAlloc), validImages);
|
||||
|
||||
auto isMemTransferNeeded = true;
|
||||
if (srcImage->isMemObjZeroCopy()) {
|
||||
size_t hostOffset;
|
||||
@@ -72,7 +76,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -99,7 +103,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo);
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MAP, TransferDirectionHelper::fromGfxAllocToHost(*gpuAllocation), false);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
|
||||
if (event) {
|
||||
@@ -210,7 +210,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_UNMAP, TransferDirectionHelper::fromHostToGfxAlloc(*gpuAllocation), false);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
if (event) {
|
||||
@@ -328,10 +328,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
cl_command_type cmdType;
|
||||
|
||||
if (copyType == SvmToHost) {
|
||||
GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false);
|
||||
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
if (size != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -343,16 +346,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == HostToSvm) {
|
||||
GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
|
||||
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
if (size != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -364,10 +369,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &srcHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == SvmToSvm) {
|
||||
GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false);
|
||||
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
|
||||
@@ -376,15 +384,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstSvmSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else {
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirection::HostToHost, false);
|
||||
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
if (size != 0) {
|
||||
auto &csr = getGpgpuCommandStreamReceiver();
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
@@ -398,7 +406,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
}
|
||||
if (event) {
|
||||
|
||||
@@ -38,6 +38,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
|
||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
|
||||
numEventsInWaitList, eventWaitList);
|
||||
GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
|
||||
|
||||
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
//check if we are dealing with SVM pointer here for which we already have an allocation
|
||||
@@ -94,7 +96,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -113,7 +115,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
|
||||
@@ -36,6 +36,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT;
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false);
|
||||
|
||||
if (buffer->isMemObjZeroCopy()) {
|
||||
size_t bufferOffset;
|
||||
size_t hostOffset;
|
||||
@@ -62,7 +65,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -86,7 +89,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
dc.dstSlicePitch = bufferSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
|
||||
@@ -37,6 +37,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
auto cmdType = CL_COMMAND_WRITE_IMAGE;
|
||||
auto isMemTransferNeeded = true;
|
||||
|
||||
const bool validImages = blitEnqueueImageAllowed(origin, region, *dstImage);
|
||||
GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstImageAlloc), validImages);
|
||||
|
||||
if (dstImage->isMemObjZeroCopy()) {
|
||||
size_t hostOffset;
|
||||
Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
|
||||
@@ -65,7 +69,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -92,7 +96,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
|
||||
53
opencl/source/command_queue/transfer_direction.h
Normal file
53
opencl/source/command_queue/transfer_direction.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
enum class TransferDirection {
|
||||
HostToHost,
|
||||
HostToLocal,
|
||||
LocalToHost,
|
||||
LocalToLocal,
|
||||
};
|
||||
|
||||
struct TransferDirectionHelper {
|
||||
static inline TransferDirection fromGfxAllocToHost(const GraphicsAllocation &src) {
|
||||
const bool srcLocal = src.isAllocatedInLocalMemoryPool();
|
||||
return create(srcLocal, false);
|
||||
}
|
||||
|
||||
static inline TransferDirection fromHostToGfxAlloc(const GraphicsAllocation &dst) {
|
||||
const bool dstLocal = dst.isAllocatedInLocalMemoryPool();
|
||||
return create(false, dstLocal);
|
||||
}
|
||||
|
||||
static inline TransferDirection fromGfxAllocToGfxAlloc(const GraphicsAllocation &src, const GraphicsAllocation &dst) {
|
||||
const bool srcLocal = src.isAllocatedInLocalMemoryPool();
|
||||
const bool dstLocal = dst.isAllocatedInLocalMemoryPool();
|
||||
return create(srcLocal, dstLocal);
|
||||
}
|
||||
|
||||
static inline TransferDirection create(bool srcLocal, bool dstLocal) {
|
||||
if (srcLocal) {
|
||||
if (dstLocal) {
|
||||
return TransferDirection::LocalToLocal;
|
||||
} else {
|
||||
return TransferDirection::LocalToHost;
|
||||
}
|
||||
} else {
|
||||
if (dstLocal) {
|
||||
return TransferDirection::HostToLocal;
|
||||
} else {
|
||||
return TransferDirection::HostToHost;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user