mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-01 04:23:00 +08:00
performance: use staging buffer when writing to an image
Related-To: NEO-12968 Also, don't import usm/mapped allocations for image operations Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0f2f3c3764
commit
cf58be4142
@@ -1633,4 +1633,12 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
|
||||
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingWriteImage(size_t size) {
|
||||
auto stagingBufferManager = context->getStagingBufferManager();
|
||||
if (!stagingBufferManager) {
|
||||
return false;
|
||||
}
|
||||
return stagingBufferManager->isValidForStagingWriteImage(this->getDevice(), size);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -390,6 +390,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
|
||||
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
|
||||
bool isValidForStagingWriteImage(size_t size);
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -66,6 +66,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
bool tempAllocFallback = false;
|
||||
|
||||
if (!mapAllocation) {
|
||||
InternalMemoryType memoryType = InternalMemoryType::notSpecified;
|
||||
bool isCpuCopyAllowed = false;
|
||||
cl_int retVal = getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, device->getRootDeviceIndex(), mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
}
|
||||
}
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/utilities/staging_buffer_manager.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
@@ -61,6 +62,35 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
|
||||
auto bcsSplit = this->isSplitEnqueueBlitNeeded(csrSelectionArgs.direction, getTotalSizeFromRectRegion(region), csr);
|
||||
|
||||
StagingBufferTracker stagingBufferTracker{};
|
||||
if (!mapAllocation) {
|
||||
InternalMemoryType memoryType = InternalMemoryType::notSpecified;
|
||||
bool isCpuCopyAllowed = false;
|
||||
cl_int retVal = getContext().tryGetExistingHostPtrAllocation(srcPtr, hostPtrSize, device->getRootDeviceIndex(), mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (!mapAllocation && this->isValidForStagingWriteImage(hostPtrSize)) {
|
||||
auto allocatedSize = hostPtrSize;
|
||||
auto [heapAllocator, stagingBuffer] = getContext().getStagingBufferManager()->requestStagingBuffer(allocatedSize, &csr);
|
||||
auto stagingBufferPtr = addrToPtr(stagingBuffer);
|
||||
if (stagingBufferPtr != nullptr) {
|
||||
stagingBufferTracker = StagingBufferTracker{heapAllocator, stagingBuffer, allocatedSize, 0};
|
||||
memcpy(stagingBufferPtr, srcPtr, hostPtrSize);
|
||||
srcPtr = stagingBufferPtr;
|
||||
|
||||
mapAllocation = getContext().getSVMAllocsManager()->getSVMAlloc(srcPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
UNRECOVERABLE_IF(mapAllocation == nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
if (mapAllocation) {
|
||||
mapAllocation->setAubWritable(true, GraphicsAllocation::defaultBank);
|
||||
mapAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
||||
}
|
||||
}
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
@@ -104,6 +134,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
|
||||
if (stagingBufferTracker.chunkAddress != 0) {
|
||||
stagingBufferTracker.taskCountToWait = csr.peekTaskCount();
|
||||
getContext().getStagingBufferManager()->trackChunk(stagingBufferTracker);
|
||||
}
|
||||
|
||||
if (dispatchResult != CL_SUCCESS) {
|
||||
return dispatchResult;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user