Hint for map/unmap when queue is blocked
Change-Id: I8b2c79fac77325e26e2ead663177cef09b17e00b
This commit is contained in:
parent
1f35f51b5e
commit
e738e6aa48
|
@ -429,6 +429,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
bool mapDcFlushRequired = false;
|
bool mapDcFlushRequired = false;
|
||||||
bool isSpecialCommandQueue = false;
|
bool isSpecialCommandQueue = false;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void providePerformanceHint(TransferProperties &transferProperties);
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef CommandQueue *(*CommandQueueCreateFunc)(
|
typedef CommandQueue *(*CommandQueueCreateFunc)(
|
||||||
|
|
|
@ -94,22 +94,12 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
switch (transferProperties.cmdType) {
|
switch (transferProperties.cmdType) {
|
||||||
case CL_COMMAND_MAP_BUFFER:
|
case CL_COMMAND_MAP_BUFFER:
|
||||||
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
|
||||||
}
|
|
||||||
transferProperties.memObj->transferDataToHostPtr({{transferProperties.memObj->getSize(), 0, 0}}, {{0, 0, 0}});
|
transferProperties.memObj->transferDataToHostPtr({{transferProperties.memObj->getSize(), 0, 0}}, {{0, 0, 0}});
|
||||||
eventCompleted = true;
|
eventCompleted = true;
|
||||||
} else {
|
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CL_COMMAND_MAP_IMAGE:
|
case CL_COMMAND_MAP_IMAGE:
|
||||||
if (!image->isMemObjZeroCopy()) {
|
if (!image->isMemObjZeroCopy()) {
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
|
||||||
}
|
|
||||||
auto &imgDesc = image->getImageDesc();
|
auto &imgDesc = image->getImageDesc();
|
||||||
std::array<size_t, 3> copySize = {{getValidParam(imgDesc.image_width),
|
std::array<size_t, 3> copySize = {{getValidParam(imgDesc.image_width),
|
||||||
getValidParam(imgDesc.image_height),
|
getValidParam(imgDesc.image_height),
|
||||||
|
@ -119,18 +109,12 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
GetInfoHelper::set(transferProperties.retRowPitchPtr, image->getHostPtrRowPitch());
|
GetInfoHelper::set(transferProperties.retRowPitchPtr, image->getHostPtrRowPitch());
|
||||||
eventCompleted = true;
|
eventCompleted = true;
|
||||||
} else {
|
} else {
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
|
||||||
}
|
|
||||||
GetInfoHelper::set(transferProperties.retSlicePitchPtr, image->getImageDesc().image_slice_pitch);
|
GetInfoHelper::set(transferProperties.retSlicePitchPtr, image->getImageDesc().image_slice_pitch);
|
||||||
GetInfoHelper::set(transferProperties.retRowPitchPtr, image->getImageDesc().image_row_pitch);
|
GetInfoHelper::set(transferProperties.retRowPitchPtr, image->getImageDesc().image_row_pitch);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||||
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
|
|
||||||
}
|
|
||||||
std::array<size_t, 3> copySize = {{transferProperties.memObj->getSize(), 0, 0}};
|
std::array<size_t, 3> copySize = {{transferProperties.memObj->getSize(), 0, 0}};
|
||||||
if (image) {
|
if (image) {
|
||||||
auto imgDesc = image->getImageDesc();
|
auto imgDesc = image->getImageDesc();
|
||||||
|
@ -140,24 +124,14 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
}
|
}
|
||||||
transferProperties.memObj->transferDataFromHostPtr(copySize, {{0, 0, 0}});
|
transferProperties.memObj->transferDataFromHostPtr(copySize, {{0, 0, 0}});
|
||||||
eventCompleted = true;
|
eventCompleted = true;
|
||||||
} else {
|
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
transferProperties.memObj->decMapCount();
|
transferProperties.memObj->decMapCount();
|
||||||
break;
|
break;
|
||||||
case CL_COMMAND_READ_BUFFER:
|
case CL_COMMAND_READ_BUFFER:
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
|
||||||
}
|
|
||||||
memcpy_s(transferProperties.ptr, *transferProperties.sizePtr, ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), *transferProperties.offsetPtr), *transferProperties.sizePtr);
|
memcpy_s(transferProperties.ptr, *transferProperties.sizePtr, ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), *transferProperties.offsetPtr), *transferProperties.sizePtr);
|
||||||
eventCompleted = true;
|
eventCompleted = true;
|
||||||
break;
|
break;
|
||||||
case CL_COMMAND_WRITE_BUFFER:
|
case CL_COMMAND_WRITE_BUFFER:
|
||||||
if (context->isProvidingPerformanceHints()) {
|
|
||||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
|
||||||
}
|
|
||||||
memcpy_s(ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), *transferProperties.offsetPtr), *transferProperties.sizePtr, transferProperties.ptr, *transferProperties.sizePtr);
|
memcpy_s(ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), *transferProperties.offsetPtr), *transferProperties.sizePtr, transferProperties.ptr, *transferProperties.sizePtr);
|
||||||
eventCompleted = true;
|
eventCompleted = true;
|
||||||
break;
|
break;
|
||||||
|
@ -166,7 +140,6 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
default:
|
default:
|
||||||
err.set(CL_INVALID_OPERATION);
|
err.set(CL_INVALID_OPERATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eventBuilder.getEvent()) {
|
if (eventBuilder.getEvent()) {
|
||||||
eventBuilder.getEvent()->setEndTimeStamp();
|
eventBuilder.getEvent()->setEndTimeStamp();
|
||||||
eventBuilder.getEvent()->updateTaskCount(this->taskCount);
|
eventBuilder.getEvent()->updateTaskCount(this->taskCount);
|
||||||
|
@ -178,6 +151,10 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (context->isProvidingPerformanceHints()) {
|
||||||
|
providePerformanceHint(transferProperties);
|
||||||
|
}
|
||||||
|
|
||||||
if (transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE) {
|
if (transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE) {
|
||||||
size_t mapPtrOffset;
|
size_t mapPtrOffset;
|
||||||
if (image) {
|
if (image) {
|
||||||
|
@ -192,4 +169,35 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||||
|
|
||||||
return returnPtr; // only map returns pointer
|
return returnPtr; // only map returns pointer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) {
|
||||||
|
switch (transferProperties.cmdType) {
|
||||||
|
case CL_COMMAND_MAP_BUFFER:
|
||||||
|
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||||
|
break;
|
||||||
|
case CL_COMMAND_MAP_IMAGE:
|
||||||
|
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
|
||||||
|
break;
|
||||||
|
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||||
|
if (!transferProperties.memObj->isMemObjZeroCopy()) {
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
|
||||||
|
break;
|
||||||
|
case CL_COMMAND_READ_BUFFER:
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
||||||
|
break;
|
||||||
|
case CL_COMMAND_WRITE_BUFFER:
|
||||||
|
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|
|
@ -467,6 +467,35 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCal
|
||||||
alignedFree(address);
|
alignedFree(address);
|
||||||
delete buffer;
|
delete buffer;
|
||||||
}
|
}
|
||||||
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
|
||||||
|
|
||||||
|
void *address;
|
||||||
|
bool zeroCopyBuffer = GetParam();
|
||||||
|
UserEvent userEvent(context);
|
||||||
|
cl_event blockedEvent = &userEvent;
|
||||||
|
size_t sizeForBuffer = MemoryConstants::cacheLineSize;
|
||||||
|
if (!zeroCopyBuffer) {
|
||||||
|
sizeForBuffer++;
|
||||||
|
}
|
||||||
|
|
||||||
|
address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||||
|
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
|
||||||
|
|
||||||
|
EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
|
||||||
|
|
||||||
|
pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
|
||||||
|
EXPECT_TRUE(pCmdQ->isQueueBlocked());
|
||||||
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
EXPECT_FALSE(pCmdQ->isQueueBlocked());
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer.get()));
|
||||||
|
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer.get()));
|
||||||
|
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
|
||||||
|
|
||||||
|
alignedFree(address);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
|
||||||
|
|
||||||
|
@ -506,6 +535,44 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCall
|
||||||
delete image;
|
delete image;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
|
||||||
|
|
||||||
|
auto image = std::unique_ptr<Image>(ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context));
|
||||||
|
bool isZeroCopyImage = GetParam();
|
||||||
|
|
||||||
|
size_t origin[] = {0, 0, 0};
|
||||||
|
size_t region[] = {1, 1, 1};
|
||||||
|
|
||||||
|
if (!isZeroCopyImage) {
|
||||||
|
image.reset(ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context));
|
||||||
|
}
|
||||||
|
EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
|
||||||
|
|
||||||
|
UserEvent userEvent(context);
|
||||||
|
cl_event blockedEvent = &userEvent;
|
||||||
|
void *mapPtr = pCmdQ->enqueueMapImage(
|
||||||
|
image.get(),
|
||||||
|
CL_FALSE,
|
||||||
|
0,
|
||||||
|
origin,
|
||||||
|
region,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
&blockedEvent,
|
||||||
|
nullptr,
|
||||||
|
retVal);
|
||||||
|
EXPECT_TRUE(pCmdQ->isQueueBlocked());
|
||||||
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
pCmdQ->enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(image.get()));
|
||||||
|
EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image.get()));
|
||||||
|
EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
|
||||||
|
|
||||||
Buffer *buffer;
|
Buffer *buffer;
|
||||||
|
@ -532,6 +599,37 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCalling
|
||||||
delete buffer;
|
delete buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
|
||||||
|
|
||||||
|
void *address;
|
||||||
|
bool zeroCopyBuffer = GetParam();
|
||||||
|
UserEvent userEvent(context);
|
||||||
|
cl_event blockedEvent = &userEvent;
|
||||||
|
size_t sizeForBuffer = MemoryConstants::cacheLineSize;
|
||||||
|
if (!zeroCopyBuffer) {
|
||||||
|
sizeForBuffer++;
|
||||||
|
}
|
||||||
|
|
||||||
|
address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||||
|
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
|
||||||
|
EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
|
||||||
|
|
||||||
|
void *mapPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
|
||||||
|
EXPECT_TRUE(pCmdQ->isQueueBlocked());
|
||||||
|
|
||||||
|
pCmdQ->enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr);
|
||||||
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
EXPECT_FALSE(pCmdQ->isQueueBlocked());
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer.get()));
|
||||||
|
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
|
||||||
|
|
||||||
|
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
|
||||||
|
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
|
||||||
|
|
||||||
|
alignedFree(address);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) {
|
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) {
|
||||||
|
|
||||||
Image *image;
|
Image *image;
|
||||||
|
|
Loading…
Reference in New Issue