performance: use staging buffer when writing to an image

Related-To: NEO-12968

Also, don't import usm/mapped allocations for image
operations

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-10-30 11:04:33 +00:00
committed by Compute-Runtime-Automation
parent 0f2f3c3764
commit cf58be4142
11 changed files with 274 additions and 32 deletions

View File

@@ -1633,4 +1633,12 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
}
bool CommandQueue::isValidForStagingWriteImage(size_t size) {
auto stagingBufferManager = context->getStagingBufferManager();
if (!stagingBufferManager) {
return false;
}
return stagingBufferManager->isValidForStagingWriteImage(this->getDevice(), size);
}
} // namespace NEO

View File

@@ -390,6 +390,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
bool isValidForStagingWriteImage(size_t size);
protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -66,6 +66,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
bool tempAllocFallback = false;
if (!mapAllocation) {
InternalMemoryType memoryType = InternalMemoryType::notSpecified;
bool isCpuCopyAllowed = false;
cl_int retVal = getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, device->getRootDeviceIndex(), mapAllocation, memoryType, isCpuCopyAllowed);
if (retVal != CL_SUCCESS) {
return retVal;
}
}
if (mapAllocation) {
surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation);

View File

@@ -10,6 +10,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/utilities/staging_buffer_manager.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
@@ -61,6 +62,35 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
StagingBufferTracker stagingBufferTracker{};
if (!mapAllocation) {
InternalMemoryType memoryType = InternalMemoryType::notSpecified;
bool isCpuCopyAllowed = false;
cl_int retVal = getContext().tryGetExistingHostPtrAllocation(srcPtr, hostPtrSize, device->getRootDeviceIndex(), mapAllocation, memoryType, isCpuCopyAllowed);
if (retVal != CL_SUCCESS) {
return retVal;
}
if (!mapAllocation && this->isValidForStagingWriteImage(hostPtrSize)) {
auto allocatedSize = hostPtrSize;
auto [heapAllocator, stagingBuffer] = getContext().getStagingBufferManager()->requestStagingBuffer(allocatedSize, &csr);
auto stagingBufferPtr = addrToPtr(stagingBuffer);
if (stagingBufferPtr != nullptr) {
stagingBufferTracker = StagingBufferTracker{heapAllocator, stagingBuffer, allocatedSize, 0};
memcpy(stagingBufferPtr, srcPtr, hostPtrSize);
srcPtr = stagingBufferPtr;
mapAllocation = getContext().getSVMAllocsManager()->getSVMAlloc(srcPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
UNRECOVERABLE_IF(mapAllocation == nullptr);
}
}
if (mapAllocation) {
mapAllocation->setAubWritable(true, GraphicsAllocation::defaultBank);
mapAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank);
}
}
if (mapAllocation) {
surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation);
@@ -104,6 +134,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
if (stagingBufferTracker.chunkAddress != 0) {
stagingBufferTracker.taskCountToWait = csr.peekTaskCount();
getContext().getStagingBufferManager()->trackChunk(stagingBufferTracker);
}
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}