Map/unmap enqueue fixes [1/n]: Unify Buffer and Image paths

Change-Id: I59bf18072c15367ff6caec5dbdc1350ea2d93281
This commit is contained in:
Dunajski, Bartosz
2018-02-08 22:59:03 +01:00
parent 6bb83fb95a
commit 72b78d15ee
27 changed files with 469 additions and 581 deletions

View File

@@ -20,7 +20,7 @@
set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.h
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
@@ -38,8 +38,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects.h

View File

@@ -27,6 +27,7 @@
#include "runtime/device/device.h"
#include "runtime/device_queue/device_queue.h"
#include "runtime/event/event.h"
#include "runtime/event/event_builder.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/array_count.h"
#include "runtime/helpers/get_info.h"
@@ -493,21 +494,16 @@ bool CommandQueue::sendPerfCountersConfig() {
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
}
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) {
auto image = castToObject<Image>(memObj);
if (image) {
auto mappedRegion = image->getMappedRegion();
size_t region[] = {mappedRegion[0] ? mappedRegion[0] : 1,
mappedRegion[1] ? mappedRegion[1] : 1,
mappedRegion[2] ? mappedRegion[2] : 1};
auto retVal = enqueueWriteImage(image, CL_FALSE, image->getMappedOrigin(), region, image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(),
mappedPtr, numEventsInWaitList, eventWaitList, event);
auto retVal = enqueueWriteImage(image, CL_FALSE, image->getMappedOrigin(), image->getMappedRegion(), image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(),
mappedPtr, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
bool mustCallFinish = true;
if (!(image->getFlags() & CL_MEM_USE_HOST_PTR)) {
mustCallFinish = true;
} else {
mustCallFinish = (CommandQueue::getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList) != Event::eventNotReady);
mustCallFinish = (CommandQueue::getTaskLevelFromWaitList(this->taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList) != Event::eventNotReady);
}
if (mustCallFinish) {
finish(true);
@@ -520,10 +516,161 @@ cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr,
auto writePtr = ptrOffset(mappedPtr, buffer->getMappedOffset());
return enqueueWriteBuffer(buffer, CL_TRUE, buffer->getMappedOffset(), buffer->getMappedSize(), writePtr,
numEventsInWaitList, eventWaitList, event);
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
}
return CL_INVALID_MEM_OBJECT;
}
void *CommandQueue::enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
auto memoryManager = device->getMemoryManager();
auto memObj = transferProperties.memObj;
auto offset = transferProperties.offset;
auto size = transferProperties.size;
void *returnPtr = nullptr;
void *baseMapPtr = nullptr;
if (memObj->getFlags() & CL_MEM_USE_HOST_PTR) {
baseMapPtr = memObj->getHostPtr();
} else {
TakeOwnershipWrapper<MemObj> memObjOwnership(*transferProperties.memObj);
if (!memObj->getAllocatedMappedPtr()) {
auto memory = memoryManager->allocateSystemMemory(memObj->getSize(), MemoryConstants::pageSize);
memObj->setAllocatedMappedPtr(memory);
}
baseMapPtr = memObj->getAllocatedMappedPtr();
}
auto buffer = castToObject<Buffer>(memObj);
if (buffer) {
returnPtr = ptrOffset(baseMapPtr, *offset);
errcodeRet = enqueueReadBuffer(buffer, transferProperties.blocking, *offset, *size, returnPtr,
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
buffer->setMappedSize(*size);
buffer->setMappedOffset(*offset);
} else {
auto image = castToObject<Image>(memObj);
size_t slicePitch = image->getHostPtrSlicePitch();
size_t rowPitch = image->getHostPtrRowPitch();
GetInfoHelper::set(transferProperties.retSlicePitch, slicePitch);
GetInfoHelper::set(transferProperties.retRowPitch, rowPitch);
size_t mapOffset = image->getSurfaceFormatInfo().ImageElementSizeInBytes * offset[0] +
rowPitch * offset[1] +
slicePitch * offset[2];
returnPtr = ptrOffset(baseMapPtr, mapOffset);
size_t mappedRegion[3] = {size[0] ? size[0] : 1,
size[1] ? size[1] : 1,
size[2] ? size[2] : 1};
errcodeRet = enqueueReadImage(image, transferProperties.blocking, offset, mappedRegion, rowPitch, slicePitch, returnPtr,
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
image->setMappedOrigin((size_t *)offset);
image->setMappedRegion((size_t *)mappedRegion);
}
if (errcodeRet == CL_SUCCESS) {
memObj->incMapCount();
memObj->setMappedPtr(returnPtr);
} else {
returnPtr = nullptr;
}
return returnPtr;
}
void *CommandQueue::enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
if (transferProperties.memObj->mappingOnCpuAllowed()) {
return cpuDataTransferHandler(transferProperties, eventsRequest, errcodeRet);
} else {
return enqueueReadMemObjForMap(transferProperties, eventsRequest, errcodeRet);
}
}
cl_int CommandQueue::enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest) {
cl_int retVal;
if (transferProperties.memObj->mappingOnCpuAllowed()) {
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
} else {
retVal = enqueueWriteMemObjForUnmap(transferProperties.memObj, transferProperties.ptr, eventsRequest);
}
return retVal;
}
void *CommandQueue::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
cl_map_flags mapFlags, size_t offset,
size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet) {
TransferProperties transferProperties(buffer, CL_COMMAND_MAP_BUFFER, blockingMap != CL_FALSE, &offset, &size, nullptr, nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
}
void *CommandQueue::enqueueMapImage(Image *image, cl_bool blockingMap,
cl_map_flags mapFlags, const size_t *origin,
const size_t *region, size_t *imageRowPitch,
size_t *imageSlicePitch,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet) {
TransferProperties transferProperties(image, CL_COMMAND_MAP_IMAGE, blockingMap != CL_FALSE,
const_cast<size_t *>(origin), const_cast<size_t *>(region), nullptr,
imageRowPitch, imageSlicePitch);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
}
cl_int CommandQueue::enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
TransferProperties transferProperties(memObj, CL_COMMAND_UNMAP_MEM_OBJECT, false,
nullptr, nullptr, mappedPtr, nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
return enqueueUnmapMemObject(transferProperties, eventsRequest);
}
void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList,
size_t numEventsInWaitlist,
MapOperationType opType,
MemObj *memObj,
EventBuilder &externalEventBuilder) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
EventBuilder internalEventBuilder;
EventBuilder *eventBuilder;
// check if event will be exposed externally
if (externalEventBuilder.getEvent()) {
externalEventBuilder.getEvent()->incRefInternal();
eventBuilder = &externalEventBuilder;
} else {
// it will be an internal event
internalEventBuilder.create<VirtualEvent>(this, context);
eventBuilder = &internalEventBuilder;
}
//store task data in event
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
eventBuilder->getEvent()->setCommand(std::move(cmd));
//bind output event with input events
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
eventBuilder->addParentEvent(this->virtualEvent);
eventBuilder->finalize();
if (this->virtualEvent) {
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
this->virtualEvent->decRefInternal();
}
this->virtualEvent = eventBuilder->getEvent();
}
} // namespace OCLRT

View File

@@ -35,6 +35,7 @@ class Buffer;
class LinearStream;
class Context;
class Device;
class EventBuilder;
class Image;
class IndirectHeap;
class Kernel;
@@ -111,25 +112,20 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return CL_SUCCESS;
}
virtual void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
cl_map_flags mapFlags, size_t offset,
size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet) {
errcodeRet = CL_SUCCESS;
return CL_SUCCESS;
}
MOCKABLE_VIRTUAL void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
cl_map_flags mapFlags, size_t offset,
size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet);
virtual void *enqueueMapImage(cl_mem image, cl_bool blockingMap,
cl_map_flags mapFlags, const size_t *origin,
const size_t *region, size_t *imageRowPitch,
size_t *imageSlicePitch,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet) {
errcodeRet = CL_SUCCESS;
return CL_SUCCESS;
}
MOCKABLE_VIRTUAL void *enqueueMapImage(Image *image, cl_bool blockingMap,
cl_map_flags mapFlags, const size_t *origin,
const size_t *region, size_t *imageRowPitch,
size_t *imageSlicePitch, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet);
MOCKABLE_VIRTUAL cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags,
void *svmPtr, size_t size,
@@ -227,13 +223,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return CL_SUCCESS;
}
virtual cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
return CL_SUCCESS;
}
virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite,
size_t offset, size_t cb, const void *ptr,
cl_uint numEventsInWaitList,
@@ -309,6 +298,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_event *oclEvent,
cl_uint cmdType);
void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal);
virtual cl_int finish(bool dcFlush) { return CL_SUCCESS; }
virtual cl_int flush() { return CL_SUCCESS; }
@@ -390,6 +381,12 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return throttle;
}
void enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList,
size_t numEventsInWaitlist,
MapOperationType opType,
MemObj *memObj,
EventBuilder &externalEventBuilder);
// taskCount of last task
uint32_t taskCount;
@@ -404,7 +401,13 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
Event *virtualEvent;
protected:
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event);
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
Context *context;
Device *device;

View File

@@ -136,26 +136,6 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList,
cl_event *event) override;
void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
size_t offset, size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) override;
void *enqueueMapSharedBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
size_t offset, size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet);
void *enqueueMapImage(cl_mem image,
cl_bool blockingMap,
cl_map_flags mapFlags,
const size_t *origin,
const size_t *region,
size_t *imageRowPitch,
size_t *imageSlicePitch,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event,
cl_int &errcodeRet) override;
cl_int enqueueSVMMap(cl_bool blockingMap,
cl_map_flags mapFlags,
void *svmPtr,
@@ -249,29 +229,6 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList,
cl_event *event) override;
cl_int enqueueUnmapMemObject(MemObj *memObj,
void *mappedPtr,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) override {
cl_int retVal;
if (memObj->allowTiling() || memObj->peekSharingHandler()) {
retVal = enqueueWriteMemObjForUnmap(memObj, mappedPtr, numEventsInWaitList, eventWaitList, event);
} else {
cpuDataTransferHandler(memObj,
CL_COMMAND_UNMAP_MEM_OBJECT,
CL_FALSE,
0,
0,
mappedPtr,
numEventsInWaitList,
eventWaitList,
event,
retVal);
}
return retVal;
}
cl_int enqueueWriteBuffer(Buffer *buffer,
cl_bool blockingWrite,
size_t offset,
@@ -381,29 +338,12 @@ class CommandQueueHw : public CommandQueue {
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> printfHandler);
void addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
size_t numEventsInWaitlist,
MapOperationType opType,
MemObj *memObj,
EventBuilder &externalEventBuilder);
void *cpuDataTransferHandler(MemObj *memObj,
cl_command_type cmdType,
cl_bool blocking,
size_t offset,
size_t size,
void *ptr,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event,
cl_int &retVal);
protected:
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo);
private:
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override;
void forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo);
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,

View File

@@ -30,8 +30,6 @@
#include "runtime/command_queue/enqueue_fill_buffer.h"
#include "runtime/command_queue/enqueue_fill_image.h"
#include "runtime/command_queue/enqueue_kernel.h"
#include "runtime/command_queue/enqueue_map_buffer.h"
#include "runtime/command_queue/enqueue_map_image.h"
#include "runtime/command_queue/enqueue_svm.h"
#include "runtime/command_queue/enqueue_marker.h"
#include "runtime/command_queue/enqueue_migrate_mem_objects.h"
@@ -41,6 +39,5 @@
#include "runtime/command_queue/enqueue_write_buffer.h"
#include "runtime/command_queue/enqueue_write_buffer_rect.h"
#include "runtime/command_queue/enqueue_write_image.h"
#include "runtime/command_queue/cpu_data_transfer_handler.h"
#include "runtime/command_queue/finish.h"
#include "runtime/command_queue/flush.h"

View File

@@ -20,77 +20,77 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_queue/command_queue.h"
#include "runtime/device/device.h"
#include "runtime/context/context.h"
#include "runtime/event/event_builder.h"
#include "runtime/helpers/get_info.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/mem_obj/image.h"
namespace OCLRT {
template <typename GfxFamily>
void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
cl_command_type cmdType,
cl_bool blocking,
size_t offset,
size_t size,
void *ptr,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event,
cl_int &retVal) {
void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) {
EventBuilder eventBuilder;
bool eventCompleted = false;
ErrorCodeHelper err(&retVal, CL_SUCCESS);
if (event) {
eventBuilder.create<Event>(this, cmdType, Event::eventNotReady, Event::eventNotReady);
auto memObj = transferProperties.memObj;
auto image = castToObject<Image>(memObj);
auto cmdType = transferProperties.cmdType;
auto size = transferProperties.size;
auto offset = transferProperties.offset;
if (eventsRequest.outEvent) {
eventBuilder.create<Event>(this, transferProperties.cmdType, Event::eventNotReady, Event::eventNotReady);
eventBuilder.getEvent()->setQueueTimeStamp();
eventBuilder.getEvent()->setCPUProfilingPath(true);
*event = eventBuilder.getEvent();
*eventsRequest.outEvent = eventBuilder.getEvent();
}
TakeOwnershipWrapper<Device> deviceOwnership(*device);
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
auto blockQueue = false;
auto taskLevel = 0u;
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, cmdType);
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType);
DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel);
if (event) {
if (eventsRequest.outEvent) {
eventBuilder.getEvent()->taskLevel = taskLevel;
}
if (blockQueue &&
(cmdType == CL_COMMAND_MAP_BUFFER || cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) {
(transferProperties.cmdType == CL_COMMAND_MAP_BUFFER ||
transferProperties.cmdType == CL_COMMAND_MAP_IMAGE ||
transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) {
addMapUnmapToWaitlistEventsDependencies(eventWaitList,
static_cast<size_t>(numEventsInWaitList),
cmdType == CL_COMMAND_MAP_BUFFER ? MAP : UNMAP,
memObj,
eventBuilder);
enqueueBlockedMapUnmapOperation(eventsRequest.eventWaitList,
static_cast<size_t>(eventsRequest.numEventsInWaitList),
transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT ? UNMAP : MAP,
transferProperties.memObj,
eventBuilder);
}
queueOwnership.unlock();
deviceOwnership.unlock();
// read/write buffers are always blocking
if (!blockQueue || blocking) {
err.set(Event::waitForEvents(numEventsInWaitList, eventWaitList));
if (!blockQueue || transferProperties.blocking) {
err.set(Event::waitForEvents(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList));
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->setSubmitTimeStamp();
}
//wait for the completness of previous commands
if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) {
if (!memObj->isMemObjZeroCopy() || blocking) {
if (!memObj->isMemObjZeroCopy() || transferProperties.blocking) {
finish(true);
eventCompleted = true;
}
}
auto bufferStorage = ptrOffset(memObj->getCpuAddressForMemoryTransfer(), offset);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->setStartTimeStamp();
}
@@ -108,32 +108,52 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(memObj));
}
}
memObj->incMapCount();
break;
case CL_COMMAND_MAP_IMAGE:
if (!image->isMemObjZeroCopy()) {
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(image));
}
image->transferDataToHostPtr();
GetInfoHelper::set(transferProperties.retSlicePitch, image->getHostPtrSlicePitch());
GetInfoHelper::set(transferProperties.retRowPitch, image->getHostPtrRowPitch());
eventCompleted = true;
} else {
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(image));
}
GetInfoHelper::set(transferProperties.retSlicePitch, image->getImageDesc().image_slice_pitch);
GetInfoHelper::set(transferProperties.retRowPitch, image->getImageDesc().image_row_pitch);
}
image->incMapCount();
break;
case CL_COMMAND_UNMAP_MEM_OBJECT:
if (!memObj->isMemObjZeroCopy()) {
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, ptr, static_cast<cl_mem>(memObj));
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(memObj));
}
memObj->transferDataFromHostPtrToMemoryStorage();
eventCompleted = true;
} else {
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, ptr);
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
}
}
memObj->decMapCount();
break;
case CL_COMMAND_READ_BUFFER:
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), transferProperties.ptr);
}
memcpy_s(ptr, size, bufferStorage, size);
memcpy_s(transferProperties.ptr, *size, ptrOffset(memObj->getCpuAddressForMemoryTransfer(), *offset), *size);
eventCompleted = true;
break;
case CL_COMMAND_WRITE_BUFFER:
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), transferProperties.ptr);
}
memcpy_s(bufferStorage, size, ptr, size);
memcpy_s(ptrOffset(memObj->getCpuAddressForMemoryTransfer(), *offset), *size, transferProperties.ptr, *size);
eventCompleted = true;
break;
case CL_COMMAND_MARKER:
@@ -154,11 +174,22 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
}
if (cmdType == CL_COMMAND_MAP_BUFFER) {
return memObj->setAndReturnMappedPtr(offset);
return memObj->setAndReturnMappedPtr(*offset);
}
if (cmdType == CL_COMMAND_UNMAP_MEM_OBJECT) {
err.set(ptr == memObj->getMappedPtr() ? CL_SUCCESS : CL_INVALID_VALUE);
if (cmdType == CL_COMMAND_MAP_IMAGE) {
size_t mapOffset =
image->getSurfaceFormatInfo().ImageElementSizeInBytes * offset[0] +
image->getImageDesc().image_row_pitch * offset[1] +
image->getImageDesc().image_slice_pitch * offset[2];
void *ptrToReturn = nullptr;
if (image->isMemObjZeroCopy()) {
ptrToReturn = ptrOffset(image->getCpuAddress(), mapOffset);
} else {
ptrToReturn = ptrOffset(image->getHostPtr(), mapOffset);
}
image->setMappedPtr(ptrToReturn);
return ptrToReturn;
}
return nullptr; // only map returns pointer

View File

@@ -650,41 +650,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
this->virtualEvent = eventBuilder->getEvent();
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
size_t numEventsInWaitlist,
MapOperationType opType,
MemObj *memObj,
EventBuilder &externalEventBuilder) {
auto &commandStreamReceiver = device->getCommandStreamReceiver();
EventBuilder internalEventBuilder;
EventBuilder *eventBuilder;
// check if event will be exposed externally
if (externalEventBuilder.getEvent()) {
externalEventBuilder.getEvent()->incRefInternal();
eventBuilder = &externalEventBuilder;
} else {
// it will be an internal event
internalEventBuilder.create<VirtualEvent>(this, context);
eventBuilder = &internalEventBuilder;
}
//store task data in event
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
eventBuilder->getEvent()->setCommand(std::move(cmd));
//bind output event with input events
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
eventBuilder->addParentEvent(this->virtualEvent);
eventBuilder->finalize();
if (this->virtualEvent) {
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
this->virtualEvent->decRefInternal();
}
this->virtualEvent = eventBuilder->getEvent();
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,

View File

@@ -1,62 +0,0 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "hw_cmds.h"
#include "runtime/command_queue/command_queue_hw.h"
namespace OCLRT {
template <typename GfxFamily>
void *CommandQueueHw<GfxFamily>::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
size_t offset, size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) {
if (buffer->peekSharingHandler()) {
return enqueueMapSharedBuffer(buffer, blockingMap, mapFlags, offset, size,
numEventsInWaitList, eventWaitList, event, errcodeRet);
}
return cpuDataTransferHandler(reinterpret_cast<MemObj *>(buffer), CL_COMMAND_MAP_BUFFER,
blockingMap, offset, size, nullptr,
numEventsInWaitList, eventWaitList, event, errcodeRet);
}
template <typename GfxFamily>
void *CommandQueueHw<GfxFamily>::enqueueMapSharedBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
size_t offset, size_t size, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) {
auto memoryManager = device->getMemoryManager();
if (!buffer->getMappedPtr()) {
auto memory = memoryManager->allocateSystemMemory(buffer->getGraphicsAllocation()->getUnderlyingBufferSize(), 0);
buffer->setAllocatedMappedPtr(memory);
}
auto returnPtr = ptrOffset(buffer->getMappedPtr(), offset);
errcodeRet = enqueueReadBuffer(buffer, blockingMap, offset, size, returnPtr,
numEventsInWaitList, eventWaitList, event);
if (errcodeRet != CL_SUCCESS) {
return nullptr;
}
buffer->incMapCount();
buffer->setMappedSize(size);
buffer->setMappedOffset(offset);
return returnPtr;
}
} // namespace OCLRT

View File

@@ -1,169 +0,0 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "hw_cmds.h"
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/mem_obj/image.h"
#include "runtime/built_ins/built_ins.h"
#include "runtime/helpers/get_info.h"
#include <new>
namespace OCLRT {
template <typename GfxFamily>
void *CommandQueueHw<GfxFamily>::enqueueMapImage(cl_mem image, cl_bool blockingMap,
cl_map_flags mapFlags, const size_t *origin,
const size_t *region, size_t *imageRowPitch,
size_t *imageSlicePitch, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event,
cl_int &errcodeRet) {
auto pImage = castToObject<Image>(image);
void *ptrToReturn = nullptr;
if (context->isProvidingPerformanceHints()) {
if (pImage->isMemObjZeroCopy()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, image);
} else {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, image);
}
}
if (pImage->allowTiling() || pImage->peekSharingHandler()) {
TakeOwnershipWrapper<Image> imageOwnership(*pImage);
size_t slicePitch = pImage->getHostPtrSlicePitch();
GetInfoHelper retSlice(imageSlicePitch, sizeof(size_t), nullptr);
retSlice.set<size_t>(slicePitch);
size_t rowPitch = pImage->getHostPtrRowPitch();
GetInfoHelper retRowPitch(imageRowPitch, sizeof(size_t), nullptr);
retRowPitch.set<size_t>(rowPitch);
auto memoryManager = device->getMemoryManager();
size_t Region[] = {region[0] ? region[0] : 1,
region[1] ? region[1] : 1,
region[2] ? region[2] : 1};
if (pImage->getFlags() & CL_MEM_USE_HOST_PTR) {
size_t offset =
pImage->getSurfaceFormatInfo().ImageElementSizeInBytes * origin[0] +
pImage->getImageDesc().image_row_pitch * origin[1] +
pImage->getImageDesc().image_slice_pitch * origin[2];
auto mappedPtr = ptrOffset(pImage->getHostPtr(), offset);
pImage->setMappedPtr(mappedPtr);
} else if (!pImage->getAllocatedMappedPtr()) {
auto memory = memoryManager->allocateSystemMemory(pImage->getSize(), MemoryConstants::pageSize);
pImage->setAllocatedMappedPtr(memory);
}
errcodeRet = enqueueReadImage(pImage, blockingMap, origin, Region, rowPitch, slicePitch, pImage->getMappedPtr(),
numEventsInWaitList, eventWaitList, event);
if (errcodeRet == CL_SUCCESS) {
pImage->setMappedOrigin((size_t *)origin);
pImage->setMappedRegion((size_t *)region);
return pImage->getMappedPtr();
} else {
return nullptr;
}
}
EventBuilder eventBuilder;
TakeOwnershipWrapper<Device> deviceOwnership(*device);
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
auto blockQueue = false;
auto taskLevel = 0u;
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, CL_COMMAND_MAP_IMAGE);
if (event) {
eventBuilder.create<Event>(this, CL_COMMAND_MAP_IMAGE, taskLevel, Event::eventNotReady);
*event = eventBuilder.getEvent();
eventBuilder.getEvent()->setQueueTimeStamp();
}
if (blockQueue) {
addMapUnmapToWaitlistEventsDependencies(eventWaitList,
static_cast<size_t>(numEventsInWaitList),
MAP,
pImage,
eventBuilder);
}
queueOwnership.unlock();
deviceOwnership.unlock();
if (blockingMap && blockQueue) {
errcodeRet = this->virtualEvent->waitForEvents(numEventsInWaitList, eventWaitList);
}
if (!blockQueue) {
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->setSubmitTimeStamp();
}
finish(true);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->setStartTimeStamp();
}
if (!pImage->isMemObjZeroCopy()) {
pImage->transferDataToHostPtr();
}
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->setStatus(CL_COMPLETE);
eventBuilder.getEvent()->updateTaskCount(this->taskCount);
eventBuilder.getEvent()->setEndTimeStamp();
}
}
if (imageSlicePitch) {
if (pImage->isMemObjZeroCopy()) {
*imageSlicePitch = pImage->getImageDesc().image_slice_pitch;
} else {
*imageSlicePitch = pImage->getHostPtrSlicePitch();
}
}
if (imageRowPitch) {
if (pImage->isMemObjZeroCopy()) {
*imageRowPitch = pImage->getImageDesc().image_row_pitch;
} else {
*imageRowPitch = pImage->getHostPtrRowPitch();
}
}
size_t offset =
pImage->getSurfaceFormatInfo().ImageElementSizeInBytes * origin[0] +
pImage->getImageDesc().image_row_pitch * origin[1] +
pImage->getImageDesc().image_slice_pitch * origin[2];
if (pImage->isMemObjZeroCopy()) {
ptrToReturn = ptrOffset(pImage->getCpuAddress(), offset);
} else {
ptrToReturn = ptrOffset(pImage->getHostPtr(), offset);
}
errcodeRet = CL_SUCCESS;
pImage->setMappedPtr(ptrToReturn);
return ptrToReturn;
}
} // namespace OCLRT

View File

@@ -51,16 +51,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
if (!isMemTransferNeeded) {
cpuDataTransferHandler(buffer,
CL_COMMAND_MARKER,
CL_TRUE,
offset,
size,
ptr,
numEventsInWaitList,
eventWaitList,
event,
retVal);
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, true, &offset, &size, ptr, nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
@@ -71,16 +64,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
return retVal;
}
cpuDataTransferHandler(buffer,
CL_COMMAND_READ_BUFFER,
CL_TRUE,
offset,
size,
ptr,
numEventsInWaitList,
eventWaitList,
event,
retVal);
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, true, &offset, &size, ptr, nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
return retVal;
}
MultiDispatchInfo dispatchInfo;

View File

@@ -50,16 +50,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
if (!isMemTransferNeeded) {
cpuDataTransferHandler(buffer,
CL_COMMAND_MARKER,
CL_TRUE,
offset,
size,
const_cast<void *>(ptr),
numEventsInWaitList,
eventWaitList,
event,
retVal);
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, true, &offset, &size, const_cast<void *>(ptr), nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
@@ -70,16 +64,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
}
return retVal;
}
cpuDataTransferHandler(buffer,
CL_COMMAND_WRITE_BUFFER,
CL_TRUE,
offset,
size,
const_cast<void *>(ptr),
numEventsInWaitList,
eventWaitList,
event,
retVal);
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, true, &offset, &size, const_cast<void *>(ptr), nullptr, nullptr);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
return retVal;
}
MultiDispatchInfo dispatchInfo;