mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add support for zero-copy r/w buffer
Change-Id: Ie9f3f2211d107eb338bd97692d36e9c7d7a0feab
This commit is contained in:
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@ -2,4 +2,4 @@
|
||||
neoDependenciesRev='730226-753'
|
||||
strategy='EQUAL'
|
||||
allowedF=49
|
||||
allowedCD=369
|
||||
allowedCD=368
|
||||
|
@ -136,6 +136,8 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
|
||||
memcpy_s(bufferStorage, size, ptr, size);
|
||||
eventCompleted = true;
|
||||
break;
|
||||
case CL_COMMAND_MARKER:
|
||||
break;
|
||||
default:
|
||||
err.set(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
@ -46,11 +46,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_READ_BUFFER);
|
||||
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
if (!isMemTransferNeeded) {
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_MARKER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_READ_BUFFER,
|
||||
CL_TRUE,
|
||||
@ -63,9 +83,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingRead == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
builder.takeOwnership(this->context);
|
||||
@ -87,7 +126,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
}
|
||||
|
||||
enqueueHandler<CL_COMMAND_READ_BUFFER>(
|
||||
surfaces,
|
||||
blockingRead == CL_TRUE,
|
||||
@ -95,8 +133,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
|
||||
builder.releaseOwnership();
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -45,11 +45,31 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(offset, ptr, CL_COMMAND_WRITE_BUFFER);
|
||||
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() ||
|
||||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
if (!isMemTransferNeeded) {
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_MARKER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
const_cast<void *>(ptr),
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_WRITE_BUFFER,
|
||||
CL_TRUE,
|
||||
@ -62,9 +82,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
if (!isMemTransferNeeded) {
|
||||
NullSurface s;
|
||||
Surface *surfaces[] = {&s};
|
||||
enqueueHandler<CL_COMMAND_MARKER>(
|
||||
surfaces,
|
||||
blockingWrite == CL_TRUE,
|
||||
dispatchInfo,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
|
||||
}
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
|
||||
|
@ -42,10 +42,12 @@ const char *DriverDiagnostics::hintFormat[] = {
|
||||
"Performance hint: Subbuffer created from buffer %p shares the same memory with buffer.", //SUBBUFFER_SHARES_MEMORY
|
||||
"Performance hint: clSVMAlloc with pointer %p and size %u meets alignment restrictions.", //CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS
|
||||
"Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA
|
||||
"Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA
|
||||
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBuffer doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
|
||||
"Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA
|
||||
"Performance hint: Pointer %p and size %u passed to clEnqueueReadBufferRect doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
|
||||
"Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA
|
||||
"Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA
|
||||
"Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA
|
||||
"Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS
|
||||
"Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA
|
||||
@ -63,4 +65,4 @@ const char *DriverDiagnostics::hintFormat[] = {
|
||||
"Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH
|
||||
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance." //KERNEL_REQUIRES_COHERENCY
|
||||
};
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -35,10 +35,12 @@ enum PerformanceHints {
|
||||
SUBBUFFER_SHARES_MEMORY,
|
||||
CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS,
|
||||
CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA,
|
||||
CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
|
||||
CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
|
||||
CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA,
|
||||
CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA,
|
||||
CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS,
|
||||
CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA,
|
||||
@ -68,4 +70,4 @@ class DriverDiagnostics {
|
||||
protected:
|
||||
cl_diagnostics_verbose_level verboseLevel;
|
||||
};
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -350,4 +350,10 @@ void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyn
|
||||
}
|
||||
memoryManager->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
bool MemObj::checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType) {
|
||||
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offset);
|
||||
auto isMemTransferNeeded = !((bufferStorage == ptr) && (cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER));
|
||||
return isMemTransferNeeded;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
@ -121,6 +121,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
|
||||
void waitForCsrCompletion();
|
||||
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
|
||||
bool checkIfMemoryTransferIsRequired(size_t offset, const void *ptr, cl_command_type cmdType);
|
||||
|
||||
protected:
|
||||
void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/fixtures/hello_world_fixture.h"
|
||||
#include "unit_tests/fixtures/buffer_fixture.h"
|
||||
#include "gtest/gtest.h"
|
||||
@ -108,3 +109,165 @@ TEST_F(EnqueueReadBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {
|
||||
|
||||
delete pEvent;
|
||||
}
|
||||
TEST_F(EnqueueReadBuffer, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(17u, pCmdQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdOOQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(17u, pCmdOOQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueReadBuffer, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(19u, pCmdQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdOOQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
@ -26,6 +26,7 @@
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "unit_tests/command_queue/enqueue_fixture.h"
|
||||
#include "unit_tests/command_queue/enqueue_read_buffer_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
@ -406,3 +407,78 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
|
||||
EXPECT_EQ(CacheSettings::l3CacheOn, csr.latestSentStatelessMocsConfig);
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdQ->enqueueReadBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnReadBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdQ->enqueueReadBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdQ->taskLevel, 0u);
|
||||
}
|
@ -23,6 +23,7 @@
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "unit_tests/command_queue/buffer_operations_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/fixtures/buffer_fixture.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <memory>
|
||||
@ -107,4 +108,167 @@ TEST_F(EnqueueWriteBufferTypeTest, eventReturnedShouldBeMaxOfInputEventsAndCmdQP
|
||||
EXPECT_LE(19u, pEvent->taskLevel);
|
||||
|
||||
delete pEvent;
|
||||
}
|
||||
|
||||
TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(17u, pCmdQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdOOQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(17u, pCmdOOQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(19u, pCmdQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
||||
TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
uint32_t taskLevelCmdQ = 17;
|
||||
pCmdOOQ->taskLevel = taskLevelCmdQ;
|
||||
|
||||
uint32_t taskLevelEvent1 = 8;
|
||||
uint32_t taskLevelEvent2 = 19;
|
||||
Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
|
||||
Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
|
||||
|
||||
cl_bool blockingRead = CL_TRUE;
|
||||
size_t size = sizeof(cl_float);
|
||||
cl_event eventWaitList[] =
|
||||
{
|
||||
&event1,
|
||||
&event2};
|
||||
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
|
||||
cl_event event = nullptr;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(),
|
||||
blockingRead,
|
||||
0,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
&event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
auto pEvent = (Event *)event;
|
||||
EXPECT_EQ(19u, pEvent->taskLevel);
|
||||
EXPECT_EQ(19u, pCmdOOQ->taskLevel);
|
||||
|
||||
pEvent->release();
|
||||
}
|
@ -25,6 +25,7 @@
|
||||
#include "runtime/gen_common/reg_configs.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "unit_tests/command_queue/enqueue_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
@ -325,3 +326,77 @@ HWTEST_F(EnqueueWriteBufferTypeTest, MediaVFEState) {
|
||||
// Generically validate this command
|
||||
FamilyType::PARSE::template validateCommand<MEDIA_VFE_STATE *>(cmdList.begin(), itorCmd);
|
||||
}
|
||||
HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<CommandQueue> pCmdOOQ(createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE));
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdOOQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdQ->taskLevel, 0u);
|
||||
}
|
||||
HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false);
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
void *ptr = srcBuffer->getCpuAddressForMemoryTransfer();
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
retVal = pCmdQ->enqueueWriteBuffer(srcBuffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(pCmdQ->taskLevel, 0u);
|
||||
}
|
@ -23,28 +23,32 @@
|
||||
#include "runtime/memory_manager/svm_memory_manager.h"
|
||||
#include "driver_diagnostics_tests.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/fixtures/buffer_fixture.h"
|
||||
|
||||
using namespace OCLRT;
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = false;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
pCmdQ->enqueueReadBuffer(
|
||||
buffer,
|
||||
CL_TRUE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
address,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), address);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) {
|
||||
|
||||
uintptr_t addressForReadBuffer = (uintptr_t)address;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
uintptr_t addressForReadBuffer = (uintptr_t)ptr;
|
||||
size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
|
||||
if (!alignedAddress) {
|
||||
addressForReadBuffer++;
|
||||
@ -63,6 +67,7 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) {
|
||||
@ -99,7 +104,25 @@ TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEn
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesProperHint) {
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
pCmdQ->enqueueWriteBuffer(
|
||||
buffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
pCmdQ->enqueueWriteBuffer(
|
||||
@ -111,11 +134,29 @@ TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteWhenEnqueueWriteBu
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = false;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
pCmdQ->enqueueWriteBuffer(
|
||||
buffer,
|
||||
CL_TRUE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = false;
|
||||
pCmdQ->enqueueWriteBuffer(
|
||||
@ -127,7 +168,75 @@ TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteWhenEnqueueWriteBuffe
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
pCmdQ->enqueueReadBuffer(
|
||||
buffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
pCmdQ->enqueueReadBuffer(
|
||||
buffer,
|
||||
CL_FALSE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
address,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = false;
|
||||
void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
pCmdQ->enqueueReadBuffer(
|
||||
buffer,
|
||||
CL_TRUE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
ptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
|
||||
|
||||
buffer->forceDisallowCPUCopy = false;
|
||||
pCmdQ->enqueueReadBuffer(
|
||||
buffer,
|
||||
CL_TRUE,
|
||||
0,
|
||||
MemoryConstants::cacheLineSize,
|
||||
address,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
|
||||
EXPECT_TRUE(containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user