mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
performance: introduce staging read for cl_buffer
Related-To: NEO-14026 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b2b3b55b19
commit
dacbce7f01
@@ -2542,7 +2542,8 @@ cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue,
|
||||
}
|
||||
|
||||
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, cb, CL_COMMAND_WRITE_BUFFER, blockingWrite, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingWriteBuffer(
|
||||
retVal = pCommandQueue->enqueueStagingBufferTransfer(
|
||||
CL_COMMAND_WRITE_BUFFER,
|
||||
pBuffer,
|
||||
blockingWrite,
|
||||
offset,
|
||||
|
||||
@@ -140,6 +140,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr,
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) = 0;
|
||||
virtual cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size,
|
||||
void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) = 0;
|
||||
|
||||
virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation,
|
||||
@@ -402,7 +405,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_int enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_event *event);
|
||||
cl_int enqueueStagingImageTransfer(cl_command_type commandType, Image *dstImage, cl_bool blockingCopy, const size_t *globalOrigin, const size_t *globalRegion,
|
||||
size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_event *event);
|
||||
cl_int enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
|
||||
cl_int enqueueStagingBufferTransfer(cl_command_type commandType, Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
|
||||
|
||||
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
|
||||
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies);
|
||||
|
||||
@@ -243,6 +243,16 @@ class CommandQueueHw : public CommandQueue {
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueReadBufferImpl(Buffer *buffer,
|
||||
cl_bool blockingRead,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event, CommandStreamReceiver &csr) override;
|
||||
|
||||
cl_int enqueueReadBufferRect(Buffer *buffer,
|
||||
cl_bool blockingRead,
|
||||
const size_t *bufferOrigin,
|
||||
|
||||
@@ -83,8 +83,9 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, commandType);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event) {
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_WRITE_BUFFER, {}, buffer, this->getDevice().getRootDeviceIndex(), &size};
|
||||
cl_int CommandQueue::enqueueStagingBufferTransfer(cl_command_type commandType, Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event) {
|
||||
auto isRead = commandType == CL_COMMAND_READ_BUFFER;
|
||||
CsrSelectionArgs csrSelectionArgs{commandType, isRead ? buffer : nullptr, isRead ? nullptr : buffer, this->getDevice().getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
cl_event profilingEvent = nullptr;
|
||||
|
||||
@@ -94,14 +95,26 @@ cl_int CommandQueue::enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingC
|
||||
auto isLastTransfer = (offset + size == chunkOffset + chunkSize);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
|
||||
auto ret = this->enqueueWriteBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
cl_int ret = 0;
|
||||
if (isRead) {
|
||||
ret = this->enqueueReadBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
} else {
|
||||
ret = this->enqueueWriteBufferImpl(buffer, false, chunkOffset, chunkSize, stagingBuffer, nullptr, 0, nullptr, outEvent, csr);
|
||||
}
|
||||
ret |= this->flush();
|
||||
return ret;
|
||||
};
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performBufferTransfer(ptr, offset, size, chunkWrite, &csr, false);
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, CL_COMMAND_WRITE_BUFFER);
|
||||
auto ret = stagingBufferManager->performBufferTransfer(ptr, offset, size, chunkWrite, &csr, isRead);
|
||||
|
||||
if (isRead && context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHintForMemoryTransfer(commandType, true, static_cast<cl_mem>(buffer), ptr);
|
||||
if (!isL3Capable(ptr, size)) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
}
|
||||
|
||||
return postStagingTransferSync(ret, event, profilingEvent, isSingleTransfer, blockingCopy, commandType);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -34,11 +34,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
return enqueueReadBufferImpl(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferImpl(
|
||||
Buffer *buffer,
|
||||
cl_bool blockingRead,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event, CommandStreamReceiver &csr) {
|
||||
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
|
||||
|
||||
CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size};
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
|
||||
@@ -21,10 +21,13 @@ using ClEnqueueReadBufferTests = ApiTests;
|
||||
namespace ULT {
|
||||
|
||||
TEST_F(ClEnqueueReadBufferTests, GivenCorrectArgumentsWhenReadingBufferThenSuccessIsReturned) {
|
||||
MockBuffer buffer{};
|
||||
MockContext context{};
|
||||
MockGraphicsAllocation allocation{};
|
||||
MockBuffer buffer{&context, allocation};
|
||||
MockCommandQueue commandQueue{context};
|
||||
auto data = 1;
|
||||
auto retVal = clEnqueueReadBuffer(
|
||||
pCommandQueue,
|
||||
&commandQueue,
|
||||
&buffer,
|
||||
false,
|
||||
0,
|
||||
|
||||
@@ -855,3 +855,160 @@ HWTEST_F(EnqueueReadBufferHw, givenHostPtrIsFromMappedBufferWhenReadBufferIsCall
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
struct ReadBufferStagingBufferTest : public EnqueueReadBufferHw {
|
||||
void SetUp() override {
|
||||
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
|
||||
EnqueueReadBufferHw::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (defaultHwInfo->capabilityTable.ftrSvm == false) {
|
||||
return;
|
||||
}
|
||||
EnqueueReadBufferHw::TearDown();
|
||||
}
|
||||
constexpr static size_t chunkSize = MemoryConstants::megaByte * 2;
|
||||
|
||||
unsigned char ptr[MemoryConstants::cacheLineSize];
|
||||
MockBuffer buffer;
|
||||
cl_queue_properties props = {};
|
||||
};
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilEventCompleted) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
|
||||
cl_event event;
|
||||
auto retVal = mockCommandQueueHw.enqueueReadBuffer(&buffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
&event);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto pEvent = castToObjectOrAbort<Event>(event);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
pEvent->updateExecutionStatus();
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUntilFinishCalled) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
|
||||
mockCommandQueueHw.finish();
|
||||
EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false));
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithLargeSizeThenSplitTransfer) {
|
||||
auto hostPtr = new unsigned char[chunkSize * 4];
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Buffer> buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(),
|
||||
0,
|
||||
chunkSize * 4,
|
||||
nullptr,
|
||||
retVal));
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
|
||||
delete[] hostPtr;
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferCalledWithEventThenReturnValidEvent) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingReadBufferCalledWithSingleTransferThenNoBarrierEnqueued) {
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_READ_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(expectedLastCmd, mockCommandQueueHw.lastCommandType);
|
||||
EXPECT_EQ(expectedLastCmd, pEvent->getCommandType());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStagingReadBufferThenTimestampsSetCorrectly) {
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_FALSE(pEvent->isCPUProfilingPath());
|
||||
EXPECT_TRUE(pEvent->isProfilingEnabled());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenEnqueueStagingReadBufferFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.enqueueReadBufferCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueReadBufferCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledThenReturnCorrectValue) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto isStagingBuffersEnabled = device->getProductHelper().isStagingBuffersEnabled();
|
||||
unsigned char ptr[16];
|
||||
|
||||
EXPECT_EQ(isStagingBuffersEnabled, mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, false, false));
|
||||
}
|
||||
|
||||
HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledAndCpuCopyAllowedThenReturnCorrectValue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.DoCpuCopyOnReadBuffer.set(1);
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
unsigned char ptr[16];
|
||||
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, true, false));
|
||||
}
|
||||
@@ -652,7 +652,7 @@ struct WriteBufferStagingBufferTest : public EnqueueWriteBufferHw {
|
||||
|
||||
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, buffer.getSize(), ptr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);
|
||||
@@ -708,7 +708,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithLa
|
||||
chunkSize * 4,
|
||||
nullptr,
|
||||
retVal));
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, buffer.get(), false, 0, chunkSize * 4, hostPtr, nullptr);
|
||||
EXPECT_TRUE(mockCommandQueueHw.flushCalled);
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
@@ -723,7 +723,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferCalledWithEv
|
||||
constexpr cl_command_type expectedLastCmd = CL_COMMAND_WRITE_BUFFER;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -738,7 +738,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenOutOfOrderQueueWhenEnqueueStagingWri
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setOoqEnabled();
|
||||
cl_event event;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -752,7 +752,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
|
||||
cl_event event;
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.setProfilingEnabled();
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, &event);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
@@ -765,7 +765,7 @@ HWTEST_F(WriteBufferStagingBufferTest, givenCmdQueueWithProfilingWhenEnqueueStag
|
||||
HWTEST_F(WriteBufferStagingBufferTest, whenEnqueueStagingWriteBufferFailedThenPropagateErrorCode) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context.get(), device.get(), &props);
|
||||
mockCommandQueueHw.enqueueWriteBufferCallBase = false;
|
||||
auto res = mockCommandQueueHw.enqueueStagingWriteBuffer(&buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingBufferTransfer(CL_COMMAND_WRITE_BUFFER, &buffer, false, 0, MemoryConstants::cacheLineSize, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_INVALID_OPERATION);
|
||||
EXPECT_EQ(1ul, mockCommandQueueHw.enqueueWriteBufferCounter);
|
||||
|
||||
@@ -100,6 +100,26 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueStagingReadBufferIsCalledThenContextProvidesHintsAboutAlignments) {
|
||||
REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0));
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
uintptr_t addressForReadBuffer = (uintptr_t)ptr;
|
||||
size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
|
||||
if (!alignedAddress) {
|
||||
addressForReadBuffer++;
|
||||
}
|
||||
if (!alignedSize) {
|
||||
sizeForReadBuffer--;
|
||||
}
|
||||
pCmdQ->enqueueStagingBufferTransfer(CL_COMMAND_READ_BUFFER, buffer, CL_FALSE,
|
||||
0, sizeForReadBuffer, (void *)addressForReadBuffer, nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBuffer);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
|
||||
|
||||
size_t bufferOrigin[] = {0, 0, 0};
|
||||
|
||||
@@ -190,6 +190,12 @@ class MockCommandQueue : public CommandQueue {
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; }
|
||||
|
||||
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t cb,
|
||||
void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region,
|
||||
size_t rowPitch, size_t slicePitch, void *ptr,
|
||||
GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList,
|
||||
@@ -433,6 +439,16 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
|
||||
cl_int enqueueReadBufferImpl(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, CommandStreamReceiver &csr) override {
|
||||
enqueueReadBufferCounter++;
|
||||
blockingReadBuffer = blockingRead == CL_TRUE;
|
||||
if (enqueueReadBufferCallBase) {
|
||||
return BaseClass::enqueueReadBufferImpl(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event, csr);
|
||||
}
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
|
||||
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
|
||||
kernelParams = dispatchInfo.peekBuiltinOpParams();
|
||||
lastCommandType = commandType;
|
||||
@@ -540,8 +556,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
bool enqueueReadImageCallBase = true;
|
||||
size_t enqueueWriteBufferCounter = 0;
|
||||
bool enqueueWriteBufferCallBase = true;
|
||||
size_t enqueueReadBufferCounter = 0;
|
||||
bool enqueueReadBufferCallBase = true;
|
||||
size_t requestedCmdStreamSize = 0;
|
||||
bool blockingWriteBuffer = false;
|
||||
bool blockingReadBuffer = false;
|
||||
bool storeMultiDispatchInfo = false;
|
||||
bool notifyEnqueueReadBufferCalled = false;
|
||||
bool notifyEnqueueReadImageCalled = false;
|
||||
|
||||
@@ -171,10 +171,7 @@ StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr
|
||||
}
|
||||
}
|
||||
|
||||
if (isRead) {
|
||||
auto numOfSubmittedTransfers = numOfChunks + (remainder != 0 ? 1 : 0);
|
||||
result.waitStatus = drainAndReleaseStagingQueue(stagingQueue, std::min(numOfSubmittedTransfers, maxInFlightReads));
|
||||
}
|
||||
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, numOfChunks + (remainder != 0 ? 1 : 0));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -203,6 +200,7 @@ StagingTransferStatus StagingBufferManager::performBufferTransfer(const void *pt
|
||||
}
|
||||
}
|
||||
|
||||
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, copiesNum + (remainder != 0 ? 1 : 0));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -236,15 +234,17 @@ WaitStatus StagingBufferManager::copyStagingToHost(const std::pair<UserData, Sta
|
||||
* Waits for all pending transfers to finish.
|
||||
* Releases staging buffers back to pool for reuse.
|
||||
*/
|
||||
WaitStatus StagingBufferManager::drainAndReleaseStagingQueue(const StagingQueue &stagingQueue, size_t numOfTransfers) const {
|
||||
WaitStatus StagingBufferManager::drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const {
|
||||
if (isRead) {
|
||||
StagingBufferTracker tracker{};
|
||||
for (auto i = 0u; i < numOfTransfers; i++) {
|
||||
for (auto i = 0u; i < std::min(numOfSubmittedTransfers, maxInFlightReads); i++) {
|
||||
auto status = copyStagingToHost(stagingQueue[i], tracker);
|
||||
if (status == WaitStatus::gpuHang) {
|
||||
return status;
|
||||
}
|
||||
tracker.freeChunk();
|
||||
}
|
||||
}
|
||||
return WaitStatus::ready;
|
||||
}
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
|
||||
StagingTransferStatus performChunkTransfer(size_t chunkTransferId, bool isRead, const UserData &userData, StagingQueue ¤tStagingBuffers, CommandStreamReceiver *csr, Func &func, Args... args);
|
||||
|
||||
WaitStatus copyStagingToHost(const std::pair<UserData, StagingBufferTracker> &transfer, StagingBufferTracker &tracker) const;
|
||||
WaitStatus drainAndReleaseStagingQueue(const StagingQueue &stagingQueue, size_t numOfTransfers) const;
|
||||
WaitStatus drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const;
|
||||
|
||||
bool isValidForStaging(const Device &device, const void *ptr, size_t size, bool hasDependencies);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user