Add support for zero-copy r/w buffer

Change-Id: Ie9f3f2211d107eb338bd97692d36e9c7d7a0feab
This commit is contained in:
mplewka
2018-01-11 15:42:55 +01:00
parent 79e4b2c104
commit 2c2bbbcdbb
13 changed files with 694 additions and 17 deletions

2
Jenkinsfile vendored
View File

@ -2,4 +2,4 @@
neoDependenciesRev='730226-753'
strategy='EQUAL'
allowedF=49
allowedCD=369
allowedCD=368

View File

@ -136,6 +136,8 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
memcpy_s(bufferStorage, size, ptr, size);
eventCompleted = true;
break;
case CL_COMMAND_MARKER:
break;
default:
err.set(CL_INVALID_OPERATION);
}

View File

@ -46,11 +46,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
cl_event *event) {
cl_int retVal = CL_SUCCESS;
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_READ_BUFFER);
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() ||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
if (!isMemTransferNeeded) {
cpuDataTransferHandler(buffer,
CL_COMMAND_MARKER,
CL_TRUE,
offset,
size,
ptr,
numEventsInWaitList,
eventWaitList,
event,
retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return retVal;
}
cpuDataTransferHandler(buffer,
CL_COMMAND_READ_BUFFER,
CL_TRUE,
@ -63,9 +83,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
retVal);
return retVal;
}
MultiDispatchInfo dispatchInfo;
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
this->getContext(), this->getDevice());
builder.takeOwnership(this->context);
@ -87,7 +126,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
enqueueHandler<CL_COMMAND_READ_BUFFER>(
surfaces,
blockingRead == CL_TRUE,
@ -95,8 +133,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
numEventsInWaitList,
eventWaitList,
event);
builder.releaseOwnership();
return CL_SUCCESS;
}
}
} // namespace OCLRT

View File

@ -45,11 +45,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
cl_event *event) {
cl_int retVal = CL_SUCCESS;
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_WRITE_BUFFER);
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() ||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
if (!isMemTransferNeeded) {
cpuDataTransferHandler(buffer,
CL_COMMAND_MARKER,
CL_TRUE,
offset,
size,
const_cast<void *>(ptr),
numEventsInWaitList,
eventWaitList,
event,
retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return retVal;
}
cpuDataTransferHandler(buffer,
CL_COMMAND_WRITE_BUFFER,
CL_TRUE,
@ -62,9 +82,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
retVal);
return retVal;
}
MultiDispatchInfo dispatchInfo;
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingWrite == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
this->getContext(), this->getDevice());

View File

@ -42,10 +42,12 @@ const char *DriverDiagnostics::hintFormat[] = {
"Performance hint: Subbuffer created from buffer %p shares the same memory with buffer.", //SUBBUFFER_SHARES_MEMORY
"Performance hint: clSVMAlloc with pointer %p and size %u meets alignment restrictions.", //CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA
"Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBuffer doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBufferRect doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA
"Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA
"Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
"Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA
@ -63,4 +65,4 @@ const char *DriverDiagnostics::hintFormat[] = {
"Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance." //KERNEL_REQUIRES_COHERENCY
};
}
} // namespace OCLRT

View File

@ -35,10 +35,12 @@ enum PerformanceHints {
SUBBUFFER_SHARES_MEMORY,
CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA,
CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA,
CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA,
CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA,
@ -68,4 +70,4 @@ class DriverDiagnostics {
protected:
cl_diagnostics_verbose_level verboseLevel;
};
}
} // namespace OCLRT

View File

@ -350,4 +350,10 @@ void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyn
}
memoryManager->freeGraphicsMemory(allocation);
}
bool MemObj::checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType) {
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offset);
auto isMemTransferNeeded = !((bufferStorage == ptr) && (cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER));
return isMemTransferNeeded;
}
} // namespace OCLRT

View File

@ -121,6 +121,7 @@ class MemObj : public BaseObject<_cl_mem> {
void waitForCsrCompletion();
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
bool checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType);
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);

View File

@ -22,6 +22,7 @@
#include "runtime/command_queue/command_queue.h"
#include "runtime/event/event.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/fixtures/hello_world_fixture.h"
#include "unit_tests/fixtures/buffer_fixture.h"
#include "gtest/gtest.h"
@ -108,3 +109,165 @@ TEST_F(EnqueueReadBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {
delete pEvent;
}
TEST_F(EnqueueReadBuffer, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(17u, pCmdQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(17u, pCmdOOQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueReadBuffer, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
pEvent->release();
}

View File

@ -26,6 +26,7 @@
#include "runtime/helpers/dispatch_info.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/command_queue/enqueue_read_buffer_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "test.h"
using namespace OCLRT;
@ -406,3 +407,78 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
EXPECT_EQ(CacheSettings::l3CacheOn, csr.latestSentStatelessMocsConfig);
EXPECT_FALSE(csr.disableL3Cache);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
cl_int retVal = CL_SUCCESS;
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdQ->enqueueReadBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
cl_int retVal = CL_SUCCESS;
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdQ->enqueueReadBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}

View File

@ -23,6 +23,7 @@
#include "runtime/command_queue/command_queue.h"
#include "runtime/event/event.h"
#include "unit_tests/command_queue/buffer_operations_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/fixtures/buffer_fixture.h"
#include "gtest/gtest.h"
#include <memory>
@ -107,4 +108,167 @@ TEST_F(EnqueueWriteBufferTypeTest, eventReturnedShouldBeMaxOfInputEventsAndCmdQP
EXPECT_LE(19u, pEvent->taskLevel);
delete pEvent;
}
TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(17u, pCmdQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(17u, pCmdOOQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdQ->taskLevel);
pEvent->release();
}
TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
cl_int retVal = CL_SUCCESS;
uint32_t taskLevelCmdQ = 17;
pCmdOOQ->taskLevel = taskLevelCmdQ;
uint32_t taskLevelEvent1 = 8;
uint32_t taskLevelEvent2 = 19;
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
cl_bool blockingRead = CL_TRUE;
size_t size = sizeof(cl_float);
cl_event eventWaitList[] =
{
&event1,
&event2};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(),
blockingRead,
0,
size,
ptr,
numEventsInWaitList,
eventWaitList,
&event);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, event);
auto pEvent = (Event *)event;
EXPECT_EQ(19u, pEvent->taskLevel);
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
pEvent->release();
}

View File

@ -25,6 +25,7 @@
#include "runtime/gen_common/reg_configs.h"
#include "runtime/helpers/dispatch_info.h"
#include "unit_tests/command_queue/enqueue_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "test.h"
using namespace OCLRT;
@ -325,3 +326,77 @@ HWTEST_F(EnqueueWriteBufferTypeTest, MediaVFEState) {
// Generically validate this command
FamilyType::PARSE::template validateCommand<MEDIA_VFE_STATE *>(cmdList.begin(), itorCmd);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
cl_int retVal = CL_SUCCESS;
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
cl_int retVal = CL_SUCCESS;
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
cl_int retVal = CL_SUCCESS;
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
EXPECT_EQ(retVal, CL_SUCCESS);
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(pCmdQ->taskLevel, 0u);
}

View File

@ -23,28 +23,32 @@
#include "runtime/memory_manager/svm_memory_manager.h"
#include "driver_diagnostics_tests.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/fixtures/buffer_fixture.h"
using namespace OCLRT;
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
buffer->forceDisallowCPUCopy = false;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueReadBuffer(
buffer,
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
address,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), address);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) {
uintptr_t addressForReadBuffer = (uintptr_t)address;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
uintptr_t addressForReadBuffer = (uintptr_t)ptr;
size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
if (!alignedAddress) {
addressForReadBuffer++;
@ -63,6 +67,7 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
EXPECT_TRUE(containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) {
@ -99,7 +104,25 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesProperHint) {
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
buffer->forceDisallowCPUCopy = true;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueWriteBuffer(
buffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
buffer->forceDisallowCPUCopy = true;
pCmdQ->enqueueWriteBuffer(
@ -111,11 +134,29 @@ TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteWhenEnqueueWriteBu
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
buffer->forceDisallowCPUCopy = false;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueWriteBuffer(
buffer,
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
buffer->forceDisallowCPUCopy = false;
pCmdQ->enqueueWriteBuffer(
@ -127,7 +168,75 @@ TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteWhenEnqueueWriteBuffe
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
buffer->forceDisallowCPUCopy = true;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueReadBuffer(
buffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
buffer->forceDisallowCPUCopy = true;
pCmdQ->enqueueReadBuffer(
buffer,
CL_FALSE,
0,
MemoryConstants::cacheLineSize,
address,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
buffer->forceDisallowCPUCopy = false;
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
pCmdQ->enqueueReadBuffer(
buffer,
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
ptr,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
EXPECT_TRUE(containsHint(expectedHint, userData));
alignedFree(ptr);
}
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
buffer->forceDisallowCPUCopy = false;
pCmdQ->enqueueReadBuffer(
buffer,
CL_TRUE,
0,
MemoryConstants::cacheLineSize,
address,
0,
nullptr,
nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
EXPECT_TRUE(containsHint(expectedHint, userData));
}