mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 23:33:20 +08:00
Add minimal transfer size for BCS split
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
74bf623ddb
commit
a3dedcc7ee
@@ -358,7 +358,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
bool isSplitEnqueueBlitSupported();
|
||||
bool isSplitEnqueueBlitNeeded(TransferDirection transferDirection, CommandStreamReceiver &csr);
|
||||
bool isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr);
|
||||
size_t getTotalSizeFromRectRegion(const size_t *region);
|
||||
|
||||
template <uint32_t cmdType>
|
||||
cl_int enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
|
||||
|
||||
@@ -1115,9 +1115,12 @@ bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitSupported() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, CommandStreamReceiver &csr) {
|
||||
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
|
||||
constexpr size_t minimalSizeForBcsSplit = 16 * MemoryConstants::megaByte;
|
||||
|
||||
auto bcsSplit = isSplitEnqueueBlitSupported() &&
|
||||
csr.getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
|
||||
transferSize >= minimalSizeForBcsSplit &&
|
||||
(transferDirection == TransferDirection::HostToLocal ||
|
||||
transferDirection == TransferDirection::LocalToHost);
|
||||
|
||||
@@ -1128,6 +1131,14 @@ bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection trans
|
||||
return bcsSplit;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandQueueHw<GfxFamily>::getTotalSizeFromRectRegion(const size_t *region) {
|
||||
auto size = region[0];
|
||||
size *= (region[1] == 0 ? 1 : region[1]);
|
||||
size *= (region[2] == 0 ? 1 : region[2]);
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
|
||||
|
||||
@@ -45,7 +45,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
dc.srcOffset = {srcOffset, 0, 0};
|
||||
dc.dstOffset = {dstOffset, 0, 0};
|
||||
dc.size = {size, 0, 0};
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
dc.srcSlicePitch = srcSlicePitch;
|
||||
dc.dstRowPitch = dstRowPitch;
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
@@ -52,7 +52,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
if (isMipMapped(dstImage->getImageDesc())) {
|
||||
dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin);
|
||||
}
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&bufferSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
|
||||
@@ -66,7 +66,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&srcBufferSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
|
||||
if (mapAllocation) {
|
||||
|
||||
@@ -63,7 +63,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&srcImgSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
bool tempAllocFallback = false;
|
||||
if (mapAllocation) {
|
||||
|
||||
@@ -123,7 +123,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
dc.srcOffset = {svmOffset, 0, 0};
|
||||
dc.size = {size, 0, 0};
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
@@ -210,7 +210,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
dc.srcOffset = {svmOperation->offset, 0, 0};
|
||||
dc.size = {svmOperation->regionSize, 0, 0};
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
dc.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, svmOperation->regionSize, csr);
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
@@ -367,7 +367,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
GeneralSurface srcSvmSurf(srcAllocation);
|
||||
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
if (size != 0) {
|
||||
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
@@ -393,7 +393,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size, true);
|
||||
GeneralSurface dstSvmSurf(dstAllocation);
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
if (size != 0) {
|
||||
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
@@ -420,7 +420,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[0] = &srcSvmSurf;
|
||||
surfaces[1] = &dstSvmSurf;
|
||||
|
||||
operationParams.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
operationParams.bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
} else {
|
||||
@@ -430,7 +430,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
|
||||
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
if (size != 0) {
|
||||
bool status = selectCsrForHostPtrAllocation(bcsSplit, csr).createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
|
||||
@@ -70,7 +70,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&bufferSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, size, csr);
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
|
||||
@@ -70,7 +70,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&dstBufferSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
|
||||
if (mapAllocation) {
|
||||
|
||||
@@ -59,7 +59,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&dstImgSurf, nullptr};
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, csr);
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
|
||||
Reference in New Issue
Block a user