Add minimal transfer size for BCS split

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-08-30 14:10:57 +00:00
committed by Compute-Runtime-Automation
parent 74bf623ddb
commit a3dedcc7ee
13 changed files with 68 additions and 38 deletions

View File

@@ -358,7 +358,8 @@ class CommandQueueHw : public CommandQueue {
cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
bool isSplitEnqueueBlitSupported();
bool isSplitEnqueueBlitNeeded(TransferDirection transferDirection, CommandStreamReceiver &csr);
bool isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr);
size_t getTotalSizeFromRectRegion(const size_t *region);
template <uint32_t cmdType>
cl_int enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);

View File

@@ -1115,9 +1115,12 @@ bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitSupported() {
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, CommandStreamReceiver &csr) {
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
constexpr size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte;
auto bcsSplit = isSplitEnqueueBlitSupported() &&
csr.getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
transferSize >= minimalSizeForBcsSplit &&
(transferDirection == TransferDirection::HostToLocal ||
transferDirection == TransferDirection::LocalToHost);
@@ -1128,6 +1131,14 @@ bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection trans
return bcsSplit;
}
template <typename GfxFamily>
size_t CommandQueueHw<GfxFamily>::getTotalSizeFromRectRegion(const size_t *region) {
auto size = region[0];
size *= (region[1] == 0 ? 1 : region[1]);
size *= (region[2] == 0 ? 1 : region[2]);
return size;
}
template <typename GfxFamily>
template <uint32_t cmdType>
cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {

View File

@@ -45,7 +45,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
dc.srcOffset = {srcOffset, 0, 0};
dc.dstOffset = {dstOffset, 0, 0};
dc.size = {size, 0, 0};
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
MultiDispatchInfo dispatchInfo(dc);

View File

@@ -56,7 +56,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.srcSlicePitch = srcSlicePitch;
dc.dstRowPitch = dstRowPitch;
dc.dstSlicePitch = dstSlicePitch;
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
MultiDispatchInfo dispatchInfo(dc);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);

View File

@@ -52,7 +52,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
if (isMipMapped(dstImage->getImageDesc())) {
dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin);
}
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
MultiDispatchInfo dispatchInfo(dc);

View File

@@ -82,7 +82,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
GeneralSurface mapSurface;
Surface *surfaces[] = {&bufferSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
if (mapAllocation) {
surfaces[1] = &mapSurface;

View File

@@ -66,7 +66,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
GeneralSurface mapSurface;
Surface *surfaces[] = {&srcBufferSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
if (mapAllocation) {

View File

@@ -63,7 +63,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
GeneralSurface mapSurface;
Surface *surfaces[] = {&srcImgSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
bool tempAllocFallback = false;
if (mapAllocation) {

View File

@@ -123,7 +123,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
dc.srcOffset = {svmOffset, 0, 0};
dc.size = {size, 0, 0};
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
@@ -210,7 +210,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
dc.srcOffset = {svmOperation->offset, 0, 0};
dc.size = {svmOperation->regionSize, 0, 0};
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, svmOperation->regionSize, csr);
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
@@ -367,7 +367,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
GeneralSurface srcSvmSurf(srcAllocation);
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
if (size != 0) {
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(dstHostPtrSurf, true);
@@ -393,7 +393,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size, true);
GeneralSurface dstSvmSurf(dstAllocation);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
if (size != 0) {
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(srcHostPtrSurf, false);
@@ -420,7 +420,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstSvmSurf;
operationParams.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
operationParams.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
dispatchInfo.setBuiltinOpParams(operationParams);
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
} else {
@@ -430,7 +430,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
if (size != 0) {
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(srcHostPtrSurf, false);

View File

@@ -70,7 +70,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
GeneralSurface mapSurface;
Surface *surfaces[] = {&bufferSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
if (mapAllocation) {
surfaces[1] = &mapSurface;

View File

@@ -70,7 +70,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
GeneralSurface mapSurface;
Surface *surfaces[] = {&dstBufferSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
if (mapAllocation) {

View File

@@ -59,7 +59,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
GeneralSurface mapSurface;
Surface *surfaces[] = {&dstImgSurf, nullptr};
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
if (mapAllocation) {
surfaces[1] = &mapSurface;