Reuse graphics allocations in readBufferRect/writeBufferRect
Related-To: NEO-6352 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
parent
9c1e7422b1
commit
a582f34c04
|
@ -959,4 +959,15 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
|||
}
|
||||
}
|
||||
|
||||
void *CommandQueue::convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) {
|
||||
// If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything.
|
||||
// Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start.
|
||||
const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY);
|
||||
if (isCpuPtr) {
|
||||
size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer());
|
||||
ptr = reinterpret_cast<void *>(allocation.getGpuAddress() + dstOffset);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -321,6 +321,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
return requiresCacheFlushAfterWalker;
|
||||
}
|
||||
|
||||
static void *convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
|
||||
|
||||
void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount);
|
||||
uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
|
||||
|
||||
|
|
|
@ -85,11 +85,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
|||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
//get offset between base cpu ptr of map allocation and dst ptr
|
||||
if ((memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY)) {
|
||||
size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer());
|
||||
dstPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + dstOffset);
|
||||
}
|
||||
dstPtr = convertAddressWithOffsetToGpuVa(dstPtr, memoryType, *mapAllocation);
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
|
|
|
@ -51,26 +51,38 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
|||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
const size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch);
|
||||
const uint32_t rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
GraphicsAllocation *mapAllocation = nullptr;
|
||||
bool isCpuCopyAllowed = false;
|
||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
|
||||
}
|
||||
|
||||
size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch);
|
||||
void *dstPtr = ptr;
|
||||
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
MemObjSurface srcBufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
|
||||
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&srcBufferSurf, nullptr};
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
dstPtr = convertAddressWithOffsetToGpuVa(dstPtr, memoryType, *mapAllocation);
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedDstPtr = alignDown(dstPtr, 4);
|
||||
|
|
|
@ -77,11 +77,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
|||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
//get offset between base cpu ptr of map allocation and dst ptr
|
||||
if ((memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY)) {
|
||||
size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer());
|
||||
srcPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + srcOffset);
|
||||
}
|
||||
srcPtr = convertAddressWithOffsetToGpuVa(srcPtr, memoryType, *mapAllocation);
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
|
|
|
@ -51,26 +51,38 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
|||
numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
const size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch);
|
||||
const uint32_t rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
|
||||
GraphicsAllocation *mapAllocation = nullptr;
|
||||
bool isCpuCopyAllowed = false;
|
||||
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
|
||||
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
|
||||
if (forceStateless(buffer->getSize())) {
|
||||
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;
|
||||
}
|
||||
|
||||
size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch);
|
||||
void *srcPtr = const_cast<void *>(ptr);
|
||||
|
||||
MemObjSurface dstBufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
|
||||
Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&dstBufferSurf, nullptr};
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
if (region[0] != 0 && region[1] != 0 && region[2] != 0) {
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
srcPtr = convertAddressWithOffsetToGpuVa(srcPtr, memoryType, *mapAllocation);
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
void *alignedSrcPtr = alignDown(srcPtr, 4);
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
|
@ -698,3 +699,48 @@ HWTEST_F(EnqueueReadBufferRectStatefulTest, WhenReadingBufferRectStatefulThenSuc
|
|||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueReadBufferRectHw, givenHostPtrIsFromMappedBufferWhenReadBufferRectIsCalledThenReuseGraphicsAllocation) {
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DisableZeroCopyForBuffers.set(1);
|
||||
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), device.get(), nullptr);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
BufferDefaults::context = context.get();
|
||||
auto bufferForMap = clUniquePtr(BufferHelper<>::create());
|
||||
auto bufferForRead = clUniquePtr(BufferHelper<>::create());
|
||||
|
||||
cl_int retVal{};
|
||||
void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, mappedPtr);
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get());
|
||||
EXPECT_NE(nullptr, mapOperationsHandler);
|
||||
MapInfo mapInfo{};
|
||||
EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo));
|
||||
EXPECT_NE(nullptr, mapInfo.graphicsAllocation);
|
||||
|
||||
auto unmappedPtr = std::make_unique<char[]>(bufferForRead->getSize());
|
||||
retVal = queue.enqueueReadBufferRect(bufferForRead.get(), CL_TRUE,
|
||||
bufferOrigin, hostOrigin,
|
||||
region,
|
||||
bufferRowPitch, bufferSlicePitch,
|
||||
hostRowPitch, hostSlicePitch,
|
||||
unmappedPtr.get(),
|
||||
0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
retVal = queue.enqueueReadBufferRect(bufferForRead.get(), CL_TRUE,
|
||||
bufferOrigin, hostOrigin,
|
||||
region,
|
||||
bufferRowPitch, bufferSlicePitch,
|
||||
hostRowPitch, hostSlicePitch,
|
||||
mappedPtr,
|
||||
0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/event/event.h"
|
||||
|
@ -697,3 +699,48 @@ HWTEST_F(EnqueueWriteBufferRectStatefulTest, WhenWritingBufferRectStatefulThenSu
|
|||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueWriteBufferRectHw, givenHostPtrIsFromMappedBufferWhenWriteBufferRectIsCalledThenReuseGraphicsAllocation) {
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.DisableZeroCopyForBuffers.set(1);
|
||||
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), device.get(), nullptr);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
BufferDefaults::context = context.get();
|
||||
auto bufferForMap = clUniquePtr(BufferHelper<>::create());
|
||||
auto bufferForWrite = clUniquePtr(BufferHelper<>::create());
|
||||
|
||||
cl_int retVal{};
|
||||
void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, mappedPtr);
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get());
|
||||
EXPECT_NE(nullptr, mapOperationsHandler);
|
||||
MapInfo mapInfo{};
|
||||
EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo));
|
||||
EXPECT_NE(nullptr, mapInfo.graphicsAllocation);
|
||||
|
||||
auto unmappedPtr = std::make_unique<char[]>(bufferForWrite->getSize());
|
||||
retVal = queue.enqueueWriteBufferRect(bufferForWrite.get(), CL_TRUE,
|
||||
bufferOrigin, hostOrigin,
|
||||
region,
|
||||
bufferRowPitch, bufferSlicePitch,
|
||||
hostRowPitch, hostSlicePitch,
|
||||
unmappedPtr.get(),
|
||||
0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
retVal = queue.enqueueWriteBufferRect(bufferForWrite.get(), CL_TRUE,
|
||||
bufferOrigin, hostOrigin,
|
||||
region,
|
||||
bufferRowPitch, bufferSlicePitch,
|
||||
hostRowPitch, hostSlicePitch,
|
||||
mappedPtr,
|
||||
0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue