performance: introduce staging read for cl_buffer

Related-To: NEO-14026

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-03-10 19:29:45 +00:00
committed by Compute-Runtime-Automation
parent b2b3b55b19
commit dacbce7f01
12 changed files with 272 additions and 29 deletions

View File

@@ -2542,7 +2542,8 @@ cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue,
}
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, cb, CL_COMMAND_WRITE_BUFFER, blockingWrite, numEventsInWaitList > 0)) {
retVal = pCommandQueue->enqueueStagingWriteBuffer(
retVal = pCommandQueue->enqueueStagingBufferTransfer(
CL_COMMAND_WRITE_BUFFER,
pBuffer,
blockingWrite,
offset,

View File

@@ -140,6 +140,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr,
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) = 0;
virtual cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size,
void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0;
virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation,
@@ -402,7 +405,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
cl_int enqueueStagingImageTransfer(cl_command_type commandType, Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event);
cl_int enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
cl_int enqueueStagingBufferTransfer(cl_command_type commandType, Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies);

View File

@@ -243,6 +243,16 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList,
cl_event *event) override;
cl_int enqueueReadBufferImpl(Buffer *buffer,
cl_bool blockingRead,
size_t offset,
size_t size,
void *ptr,
GraphicsAllocation *mapAllocation,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event, CommandStreamReceiver &csr) override;
cl_int enqueueReadBufferRect(Buffer *buffer,
cl_bool blockingRead,
const size_t *bufferOrigin,

View File

@@ -83,8 +83,9 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, commandType);
}
cl_int CommandQueue::enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event) {
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_WRITE_BUFFER, {}, buffer, this->getDevice().getRootDeviceIndex(), &size};
cl_int CommandQueue::enqueueStagingBufferTransfer(cl_command_type commandType, Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event) {
auto isRead = commandType == CL_COMMAND_READ_BUFFER;
CsrSelectionArgs csrSelectionArgs{commandType, isRead ? buffer : nullptr, isRead ? nullptr : buffer, this->getDevice().getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
cl_event profilingEvent = nullptr;
@@ -94,14 +95,26 @@ cl_int CommandQueue::enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingC
auto isLastTransfer = (offset + size == chunkOffset + chunkSize);
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
auto ret = this->enqueueWriteBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
cl_int ret = 0;
if (isRead) {
ret = this->enqueueReadBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
} else {
ret = this->enqueueWriteBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
}
ret |= this->flush();
return ret;
};
auto stagingBufferManager = this->context->getStagingBufferManager();
auto ret = stagingBufferManager->performBufferTransfer(ptr, offset, size, chunkWrite, &csr, false);
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, CL_COMMAND_WRITE_BUFFER);
auto ret = stagingBufferManager->performBufferTransfer(ptr, offset, size, chunkWrite, &csr, isRead);
if (isRead && context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(commandType, true, static_cast<cl_mem>(buffer), ptr);
if (!isL3Capable(ptr, size)) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, commandType);
}
/*

View File

@@ -34,11 +34,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
return enqueueReadBufferImpl(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
}
template <typename GfxFamily>
cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferImpl(
Buffer *buffer,
cl_bool blockingRead,
size_t offset,
size_t size,
void *ptr,
GraphicsAllocation *mapAllocation,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event, CommandStreamReceiver &csr) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));

View File

@@ -21,10 +21,13 @@ using ClEnqueueReadBufferTests = ApiTests;
namespace ULT {
TEST_F(ClEnqueueReadBufferTests, GivenCorrectArgumentsWhenReadingBufferThenSuccessIsReturned) {
MockBuffer buffer{};
MockContext context{};
MockGraphicsAllocation allocation{};
MockBuffer buffer{&context, allocation};
MockCommandQueue commandQueue{context};
auto data = 1;
auto retVal = clEnqueueReadBuffer(
pCommandQueue,
&commandQueue,
&buffer,
false,
0,

View File

@@ -855,3 +855,160 @@ HWTEST_F(EnqueueReadBufferHw, givenHostPtrIsFromMappedBufferWhenReadBufferIsCall
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
}
struct ReadBufferStagingBufferTest : public EnqueueReadBufferHw {
void SetUp() override {
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
EnqueueReadBufferHw::SetUp();
}
void TearDown() override {
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
return;
}
EnqueueReadBufferHw::TearDown();
}
constexpr static size_t chunkSize = MemoryConstants::megaByte * 2;
unsigned char ptr[MemoryConstants::cacheLineSize];
MockBuffer buffer;
cl_queue_properties props = {};
};
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilEventCompleted) {
DebugManagerStateRestore restorer;
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
cl_event event;
auto retVal = mockCommandQueueHw.enqueueReadBuffer(&buffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
nullptr,
0,
nullptr,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pEvent = castToObjectOrAbort<Event>(event);
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
pEvent->updateExecutionStatus();
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
pEvent->release();
}
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilFinishCalled) {
DebugManagerStateRestore restorer;
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
mockCommandQueueHw.finish();
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
}
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithLargeSizeThenSplitTransfer) {
auto hostPtr = new unsigned char[chunkSize * 4];
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
auto retVal = CL_SUCCESS;
std::unique_ptr<Buffer> buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(),
0,
chunkSize * 4,
nullptr,
retVal));
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadBufferCounter);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
delete[] hostPtr;
}
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithEventThenReturnValidEvent) {
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
cl_event event;
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
clReleaseEvent(event);
}
HWTEST_F(ReadBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingReadBufferCalledWithSingleTransferThenNoBarrierEnqueued) {
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.setOoqEnabled();
cl_event event;
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
clReleaseEvent(event);
}
HWTEST_F(ReadBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStagingReadBufferThenTimestampsSetCorrectly) {
cl_event event;
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.setProfilingEnabled();
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
EXPECT_FALSE(pEvent->isCPUProfilingPath());
EXPECT_TRUE(pEvent->isProfilingEnabled());
clReleaseEvent(event);
}
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferFailedThenPropagateErrorCode) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.enqueueReadBufferCallBase = false;
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
EXPECT_EQ(res, CL_INVALID_OPERATION);
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
}
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledThenReturnCorrectValue) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
auto isStagingBuffersEnabled = device->getProductHelper().isStagingBuffersEnabled();
unsigned char ptr[16];
EXPECT_EQ(isStagingBuffersEnabled, mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, false, false));
}
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledAndCpuCopyAllowedThenReturnCorrectValue) {
DebugManagerStateRestore dbgRestore;
debugManager.flags.DoCpuCopyOnReadBuffer.set(1);
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
unsigned char ptr[16];
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, true, false));
}

View File

@@ -652,7 +652,7 @@ struct WriteBufferStagingBufferTest : public EnqueueWriteBufferHw {
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, buffer.getSize(), ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);
@@ -708,7 +708,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithLa
chunkSize * 4,
nullptr,
retVal));
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(res, CL_SUCCESS);
@@ -723,7 +723,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithEv
constexpr cl_command_type expectedLastCmd = CL_COMMAND_WRITE_BUFFER;
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
cl_event event;
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
@@ -738,7 +738,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingWri
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.setOoqEnabled();
cl_event event;
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
@@ -752,7 +752,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
cl_event event;
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.setProfilingEnabled();
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
EXPECT_EQ(res, CL_SUCCESS);
auto pEvent = (Event *)event;
@@ -765,7 +765,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferFailedThenPropagateErrorCode) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
mockCommandQueueHw.enqueueWriteBufferCallBase = false;
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
EXPECT_EQ(res, CL_INVALID_OPERATION);
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);

View File

@@ -100,6 +100,26 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
alignedFree(ptr);
}
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueStagingReadBufferIsCalledThenContextProvidesHintsAboutAlignments) {
REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0));
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
uintptr_t addressForReadBuffer = (uintptr_t)ptr;
size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
if (!alignedAddress) {
addressForReadBuffer++;
}
if (!alignedSize) {
sizeForReadBuffer--;
}
pCmdQ->enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer, CL_FALSE,
0, sizeForReadBuffer, (void *)addressForReadBuffer, nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBuffer);
EXPECT_TRUE(containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
size_t bufferOrigin[] = {0, 0, 0};

View File

@@ -190,6 +190,12 @@ class MockCommandQueue : public CommandQueue {
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; }
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t cb,
void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
return CL_SUCCESS;
}
cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
size_t rowPitch, size_t slicePitch, void *ptr,
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
@@ -433,6 +439,16 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
return CL_INVALID_OPERATION;
}
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
enqueueReadBufferCounter++;
blockingReadBuffer = blockingRead == CL_TRUE;
if (enqueueReadBufferCallBase) {
return BaseClass::enqueueReadBufferImpl(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
}
return CL_INVALID_OPERATION;
}
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
kernelParams = dispatchInfo.peekBuiltinOpParams();
lastCommandType = commandType;
@@ -540,8 +556,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool enqueueReadImageCallBase = true;
size_t enqueueWriteBufferCounter = 0;
bool enqueueWriteBufferCallBase = true;
size_t enqueueReadBufferCounter = 0;
bool enqueueReadBufferCallBase = true;
size_t requestedCmdStreamSize = 0;
bool blockingWriteBuffer = false;
bool blockingReadBuffer = false;
bool storeMultiDispatchInfo = false;
bool notifyEnqueueReadBufferCalled = false;
bool notifyEnqueueReadImageCalled = false;

View File

@@ -171,10 +171,7 @@ StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr
}
}
if (isRead) {
auto numOfSubmittedTransfers = numOfChunks + (remainder != 0 ? 1 : 0);
result.waitStatus = drainAndReleaseStagingQueue(stagingQueue, std::min(numOfSubmittedTransfers, maxInFlightReads));
}
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, numOfChunks + (remainder != 0 ? 1 : 0));
return result;
}
@@ -203,6 +200,7 @@ StagingTransferStatus StagingBufferManager::performBufferTransfer(const void *pt
}
}
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, copiesNum + (remainder != 0 ? 1 : 0));
return result;
}
@@ -236,15 +234,17 @@ WaitStatus StagingBufferManager::copyStagingToHost(const std::pair<UserData, Sta
* Waits for all pending transfers to finish.
* Releases staging buffers back to pool for reuse.
*/
WaitStatus StagingBufferManager::drainAndReleaseStagingQueue(const StagingQueue &stagingQueue, size_t numOfTransfers) const {
WaitStatus StagingBufferManager::drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const {
if (isRead) {
StagingBufferTracker tracker{};
for (auto i = 0u; i < numOfTransfers; i++) {
for (auto i = 0u; i < std::min(numOfSubmittedTransfers, maxInFlightReads); i++) {
auto status = copyStagingToHost(stagingQueue[i], tracker);
if (status == WaitStatus::gpuHang) {
return status;
}
tracker.freeChunk();
}
}
return WaitStatus::ready;
}

View File

@@ -106,7 +106,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
StagingTransferStatus performChunkTransfer(size_t chunkTransferId, bool isRead, const UserData &userData, StagingQueue &currentStagingBuffers, CommandStreamReceiver *csr, Func &func, Args... args);
WaitStatus copyStagingToHost(const std::pair<UserData, StagingBufferTracker> &transfer, StagingBufferTracker &tracker) const;
WaitStatus drainAndReleaseStagingQueue(const StagingQueue &stagingQueue, size_t numOfTransfers) const;
WaitStatus drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const;
bool isValidForStaging(const Device &device, const void *ptr, size_t size, bool hasDependencies);