mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Map/unmap enqueue fixes [1/n]: Unify Buffer and Image paths
Change-Id: I59bf18072c15367ff6caec5dbdc1350ea2d93281
This commit is contained in:
@@ -20,7 +20,7 @@
|
||||
|
||||
set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
|
||||
@@ -38,8 +38,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects.h
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/device_queue/device_queue.h"
|
||||
#include "runtime/event/event.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/helpers/aligned_memory.h"
|
||||
#include "runtime/helpers/array_count.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
@@ -493,21 +494,16 @@ bool CommandQueue::sendPerfCountersConfig() {
|
||||
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
|
||||
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) {
|
||||
auto image = castToObject<Image>(memObj);
|
||||
if (image) {
|
||||
auto mappedRegion = image->getMappedRegion();
|
||||
size_t region[] = {mappedRegion[0] ? mappedRegion[0] : 1,
|
||||
mappedRegion[1] ? mappedRegion[1] : 1,
|
||||
mappedRegion[2] ? mappedRegion[2] : 1};
|
||||
|
||||
auto retVal = enqueueWriteImage(image, CL_FALSE, image->getMappedOrigin(), region, image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(),
|
||||
mappedPtr, numEventsInWaitList, eventWaitList, event);
|
||||
auto retVal = enqueueWriteImage(image, CL_FALSE, image->getMappedOrigin(), image->getMappedRegion(), image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(),
|
||||
mappedPtr, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
||||
bool mustCallFinish = true;
|
||||
if (!(image->getFlags() & CL_MEM_USE_HOST_PTR)) {
|
||||
mustCallFinish = true;
|
||||
} else {
|
||||
mustCallFinish = (CommandQueue::getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList) != Event::eventNotReady);
|
||||
mustCallFinish = (CommandQueue::getTaskLevelFromWaitList(this->taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList) != Event::eventNotReady);
|
||||
}
|
||||
if (mustCallFinish) {
|
||||
finish(true);
|
||||
@@ -520,10 +516,161 @@ cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr,
|
||||
auto writePtr = ptrOffset(mappedPtr, buffer->getMappedOffset());
|
||||
|
||||
return enqueueWriteBuffer(buffer, CL_TRUE, buffer->getMappedOffset(), buffer->getMappedSize(), writePtr,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
||||
}
|
||||
|
||||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
|
||||
void *CommandQueue::enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
|
||||
auto memObj = transferProperties.memObj;
|
||||
auto offset = transferProperties.offset;
|
||||
auto size = transferProperties.size;
|
||||
void *returnPtr = nullptr;
|
||||
void *baseMapPtr = nullptr;
|
||||
|
||||
if (memObj->getFlags() & CL_MEM_USE_HOST_PTR) {
|
||||
baseMapPtr = memObj->getHostPtr();
|
||||
} else {
|
||||
TakeOwnershipWrapper<MemObj> memObjOwnership(*transferProperties.memObj);
|
||||
if (!memObj->getAllocatedMappedPtr()) {
|
||||
auto memory = memoryManager->allocateSystemMemory(memObj->getSize(), MemoryConstants::pageSize);
|
||||
memObj->setAllocatedMappedPtr(memory);
|
||||
}
|
||||
baseMapPtr = memObj->getAllocatedMappedPtr();
|
||||
}
|
||||
|
||||
auto buffer = castToObject<Buffer>(memObj);
|
||||
if (buffer) {
|
||||
returnPtr = ptrOffset(baseMapPtr, *offset);
|
||||
errcodeRet = enqueueReadBuffer(buffer, transferProperties.blocking, *offset, *size, returnPtr,
|
||||
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
||||
|
||||
buffer->setMappedSize(*size);
|
||||
buffer->setMappedOffset(*offset);
|
||||
} else {
|
||||
auto image = castToObject<Image>(memObj);
|
||||
size_t slicePitch = image->getHostPtrSlicePitch();
|
||||
size_t rowPitch = image->getHostPtrRowPitch();
|
||||
|
||||
GetInfoHelper::set(transferProperties.retSlicePitch, slicePitch);
|
||||
GetInfoHelper::set(transferProperties.retRowPitch, rowPitch);
|
||||
|
||||
size_t mapOffset = image->getSurfaceFormatInfo().ImageElementSizeInBytes * offset[0] +
|
||||
rowPitch * offset[1] +
|
||||
slicePitch * offset[2];
|
||||
returnPtr = ptrOffset(baseMapPtr, mapOffset);
|
||||
|
||||
size_t mappedRegion[3] = {size[0] ? size[0] : 1,
|
||||
size[1] ? size[1] : 1,
|
||||
size[2] ? size[2] : 1};
|
||||
|
||||
errcodeRet = enqueueReadImage(image, transferProperties.blocking, offset, mappedRegion, rowPitch, slicePitch, returnPtr,
|
||||
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
||||
|
||||
image->setMappedOrigin((size_t *)offset);
|
||||
image->setMappedRegion((size_t *)mappedRegion);
|
||||
}
|
||||
|
||||
if (errcodeRet == CL_SUCCESS) {
|
||||
memObj->incMapCount();
|
||||
memObj->setMappedPtr(returnPtr);
|
||||
} else {
|
||||
returnPtr = nullptr;
|
||||
}
|
||||
return returnPtr;
|
||||
}
|
||||
|
||||
void *CommandQueue::enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
|
||||
if (transferProperties.memObj->mappingOnCpuAllowed()) {
|
||||
return cpuDataTransferHandler(transferProperties, eventsRequest, errcodeRet);
|
||||
} else {
|
||||
return enqueueReadMemObjForMap(transferProperties, eventsRequest, errcodeRet);
|
||||
}
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest) {
|
||||
cl_int retVal;
|
||||
if (transferProperties.memObj->mappingOnCpuAllowed()) {
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
} else {
|
||||
retVal = enqueueWriteMemObjForUnmap(transferProperties.memObj, transferProperties.ptr, eventsRequest);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void *CommandQueue::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, size_t offset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MAP_BUFFER, blockingMap != CL_FALSE, &offset, &size, nullptr, nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
|
||||
}
|
||||
|
||||
void *CommandQueue::enqueueMapImage(Image *image, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, const size_t *origin,
|
||||
const size_t *region, size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
|
||||
TransferProperties transferProperties(image, CL_COMMAND_MAP_IMAGE, blockingMap != CL_FALSE,
|
||||
const_cast<size_t *>(origin), const_cast<size_t *>(region), nullptr,
|
||||
imageRowPitch, imageSlicePitch);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
|
||||
|
||||
TransferProperties transferProperties(memObj, CL_COMMAND_UNMAP_MEM_OBJECT, false,
|
||||
nullptr, nullptr, mappedPtr, nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
return enqueueUnmapMemObject(transferProperties, eventsRequest);
|
||||
}
|
||||
|
||||
void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder) {
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
EventBuilder internalEventBuilder;
|
||||
EventBuilder *eventBuilder;
|
||||
// check if event will be exposed externally
|
||||
if (externalEventBuilder.getEvent()) {
|
||||
externalEventBuilder.getEvent()->incRefInternal();
|
||||
eventBuilder = &externalEventBuilder;
|
||||
} else {
|
||||
// it will be an internal event
|
||||
internalEventBuilder.create<VirtualEvent>(this, context);
|
||||
eventBuilder = &internalEventBuilder;
|
||||
}
|
||||
|
||||
//store task data in event
|
||||
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
|
||||
//bind output event with input events
|
||||
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
|
||||
eventBuilder->addParentEvent(this->virtualEvent);
|
||||
eventBuilder->finalize();
|
||||
|
||||
if (this->virtualEvent) {
|
||||
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
|
||||
this->virtualEvent->decRefInternal();
|
||||
}
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -35,6 +35,7 @@ class Buffer;
|
||||
class LinearStream;
|
||||
class Context;
|
||||
class Device;
|
||||
class EventBuilder;
|
||||
class Image;
|
||||
class IndirectHeap;
|
||||
class Kernel;
|
||||
@@ -111,25 +112,20 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, size_t offset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
MOCKABLE_VIRTUAL void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, size_t offset,
|
||||
size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
virtual void *enqueueMapImage(cl_mem image, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, const size_t *origin,
|
||||
const size_t *region, size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
MOCKABLE_VIRTUAL void *enqueueMapImage(Image *image, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, const size_t *origin,
|
||||
const size_t *region, size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet);
|
||||
|
||||
MOCKABLE_VIRTUAL cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
|
||||
virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
void *svmPtr, size_t size,
|
||||
@@ -227,13 +223,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite,
|
||||
size_t offset, size_t cb, const void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
@@ -309,6 +298,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_event *oclEvent,
|
||||
cl_uint cmdType);
|
||||
|
||||
void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal);
|
||||
|
||||
virtual cl_int finish(bool dcFlush) { return CL_SUCCESS; }
|
||||
|
||||
virtual cl_int flush() { return CL_SUCCESS; }
|
||||
@@ -390,6 +381,12 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return throttle;
|
||||
}
|
||||
|
||||
void enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder);
|
||||
|
||||
// taskCount of last task
|
||||
uint32_t taskCount;
|
||||
|
||||
@@ -404,7 +401,13 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
Event *virtualEvent;
|
||||
|
||||
protected:
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event);
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
|
||||
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
|
||||
|
||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
||||
@@ -136,26 +136,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) override;
|
||||
|
||||
void *enqueueMapSharedBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet);
|
||||
|
||||
void *enqueueMapImage(cl_mem image,
|
||||
cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
const size_t *origin,
|
||||
const size_t *region,
|
||||
size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &errcodeRet) override;
|
||||
|
||||
cl_int enqueueSVMMap(cl_bool blockingMap,
|
||||
cl_map_flags mapFlags,
|
||||
void *svmPtr,
|
||||
@@ -249,29 +229,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override;
|
||||
|
||||
cl_int enqueueUnmapMemObject(MemObj *memObj,
|
||||
void *mappedPtr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) override {
|
||||
cl_int retVal;
|
||||
if (memObj->allowTiling() || memObj->peekSharingHandler()) {
|
||||
retVal = enqueueWriteMemObjForUnmap(memObj, mappedPtr, numEventsInWaitList, eventWaitList, event);
|
||||
} else {
|
||||
cpuDataTransferHandler(memObj,
|
||||
CL_COMMAND_UNMAP_MEM_OBJECT,
|
||||
CL_FALSE,
|
||||
0,
|
||||
0,
|
||||
mappedPtr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int enqueueWriteBuffer(Buffer *buffer,
|
||||
cl_bool blockingWrite,
|
||||
size_t offset,
|
||||
@@ -381,29 +338,12 @@ class CommandQueueHw : public CommandQueue {
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> printfHandler);
|
||||
|
||||
void addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder);
|
||||
|
||||
void *cpuDataTransferHandler(MemObj *memObj,
|
||||
cl_command_type cmdType,
|
||||
cl_bool blocking,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &retVal);
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo);
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override;
|
||||
void forceDispatchScheduler(OCLRT::MultiDispatchInfo &multiDispatchInfo);
|
||||
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
|
||||
@@ -30,8 +30,6 @@
|
||||
#include "runtime/command_queue/enqueue_fill_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_fill_image.h"
|
||||
#include "runtime/command_queue/enqueue_kernel.h"
|
||||
#include "runtime/command_queue/enqueue_map_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_map_image.h"
|
||||
#include "runtime/command_queue/enqueue_svm.h"
|
||||
#include "runtime/command_queue/enqueue_marker.h"
|
||||
#include "runtime/command_queue/enqueue_migrate_mem_objects.h"
|
||||
@@ -41,6 +39,5 @@
|
||||
#include "runtime/command_queue/enqueue_write_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_write_buffer_rect.h"
|
||||
#include "runtime/command_queue/enqueue_write_image.h"
|
||||
#include "runtime/command_queue/cpu_data_transfer_handler.h"
|
||||
#include "runtime/command_queue/finish.h"
|
||||
#include "runtime/command_queue/flush.h"
|
||||
|
||||
@@ -20,77 +20,77 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/context/context.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
#include "runtime/mem_obj/buffer.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
|
||||
cl_command_type cmdType,
|
||||
cl_bool blocking,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
void *ptr,
|
||||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event,
|
||||
cl_int &retVal) {
|
||||
void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) {
|
||||
|
||||
EventBuilder eventBuilder;
|
||||
bool eventCompleted = false;
|
||||
ErrorCodeHelper err(&retVal, CL_SUCCESS);
|
||||
|
||||
if (event) {
|
||||
eventBuilder.create<Event>(this, cmdType, Event::eventNotReady, Event::eventNotReady);
|
||||
auto memObj = transferProperties.memObj;
|
||||
auto image = castToObject<Image>(memObj);
|
||||
auto cmdType = transferProperties.cmdType;
|
||||
auto size = transferProperties.size;
|
||||
auto offset = transferProperties.offset;
|
||||
|
||||
if (eventsRequest.outEvent) {
|
||||
eventBuilder.create<Event>(this, transferProperties.cmdType, Event::eventNotReady, Event::eventNotReady);
|
||||
eventBuilder.getEvent()->setQueueTimeStamp();
|
||||
eventBuilder.getEvent()->setCPUProfilingPath(true);
|
||||
*event = eventBuilder.getEvent();
|
||||
*eventsRequest.outEvent = eventBuilder.getEvent();
|
||||
}
|
||||
|
||||
TakeOwnershipWrapper<Device> deviceOwnership(*device);
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
|
||||
|
||||
auto blockQueue = false;
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, cmdType);
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType);
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel);
|
||||
|
||||
if (event) {
|
||||
if (eventsRequest.outEvent) {
|
||||
eventBuilder.getEvent()->taskLevel = taskLevel;
|
||||
}
|
||||
|
||||
if (blockQueue &&
|
||||
(cmdType == CL_COMMAND_MAP_BUFFER || cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) {
|
||||
(transferProperties.cmdType == CL_COMMAND_MAP_BUFFER ||
|
||||
transferProperties.cmdType == CL_COMMAND_MAP_IMAGE ||
|
||||
transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) {
|
||||
|
||||
addMapUnmapToWaitlistEventsDependencies(eventWaitList,
|
||||
static_cast<size_t>(numEventsInWaitList),
|
||||
cmdType == CL_COMMAND_MAP_BUFFER ? MAP : UNMAP,
|
||||
memObj,
|
||||
eventBuilder);
|
||||
enqueueBlockedMapUnmapOperation(eventsRequest.eventWaitList,
|
||||
static_cast<size_t>(eventsRequest.numEventsInWaitList),
|
||||
transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT ? UNMAP : MAP,
|
||||
transferProperties.memObj,
|
||||
eventBuilder);
|
||||
}
|
||||
|
||||
queueOwnership.unlock();
|
||||
deviceOwnership.unlock();
|
||||
|
||||
// read/write buffers are always blocking
|
||||
if (!blockQueue || blocking) {
|
||||
err.set(Event::waitForEvents(numEventsInWaitList, eventWaitList));
|
||||
if (!blockQueue || transferProperties.blocking) {
|
||||
err.set(Event::waitForEvents(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList));
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
}
|
||||
//wait for the completness of previous commands
|
||||
if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) {
|
||||
if (!memObj->isMemObjZeroCopy() || blocking) {
|
||||
if (!memObj->isMemObjZeroCopy() || transferProperties.blocking) {
|
||||
finish(true);
|
||||
eventCompleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto bufferStorage = ptrOffset(memObj->getCpuAddressForMemoryTransfer(), offset);
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
@@ -108,32 +108,52 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(memObj));
|
||||
}
|
||||
}
|
||||
memObj->incMapCount();
|
||||
break;
|
||||
case CL_COMMAND_MAP_IMAGE:
|
||||
if (!image->isMemObjZeroCopy()) {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(image));
|
||||
}
|
||||
image->transferDataToHostPtr();
|
||||
GetInfoHelper::set(transferProperties.retSlicePitch, image->getHostPtrSlicePitch());
|
||||
GetInfoHelper::set(transferProperties.retRowPitch, image->getHostPtrRowPitch());
|
||||
eventCompleted = true;
|
||||
} else {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(image));
|
||||
}
|
||||
GetInfoHelper::set(transferProperties.retSlicePitch, image->getImageDesc().image_slice_pitch);
|
||||
GetInfoHelper::set(transferProperties.retRowPitch, image->getImageDesc().image_row_pitch);
|
||||
}
|
||||
image->incMapCount();
|
||||
break;
|
||||
case CL_COMMAND_UNMAP_MEM_OBJECT:
|
||||
if (!memObj->isMemObjZeroCopy()) {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, ptr, static_cast<cl_mem>(memObj));
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(memObj));
|
||||
}
|
||||
memObj->transferDataFromHostPtrToMemoryStorage();
|
||||
eventCompleted = true;
|
||||
} else {
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, ptr);
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
|
||||
}
|
||||
}
|
||||
memObj->decMapCount();
|
||||
break;
|
||||
case CL_COMMAND_READ_BUFFER:
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), transferProperties.ptr);
|
||||
}
|
||||
memcpy_s(ptr, size, bufferStorage, size);
|
||||
memcpy_s(transferProperties.ptr, *size, ptrOffset(memObj->getCpuAddressForMemoryTransfer(), *offset), *size);
|
||||
eventCompleted = true;
|
||||
break;
|
||||
case CL_COMMAND_WRITE_BUFFER:
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), ptr);
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(memObj), transferProperties.ptr);
|
||||
}
|
||||
memcpy_s(bufferStorage, size, ptr, size);
|
||||
memcpy_s(ptrOffset(memObj->getCpuAddressForMemoryTransfer(), *offset), *size, transferProperties.ptr, *size);
|
||||
eventCompleted = true;
|
||||
break;
|
||||
case CL_COMMAND_MARKER:
|
||||
@@ -154,11 +174,22 @@ void *CommandQueueHw<GfxFamily>::cpuDataTransferHandler(MemObj *memObj,
|
||||
}
|
||||
|
||||
if (cmdType == CL_COMMAND_MAP_BUFFER) {
|
||||
return memObj->setAndReturnMappedPtr(offset);
|
||||
return memObj->setAndReturnMappedPtr(*offset);
|
||||
}
|
||||
|
||||
if (cmdType == CL_COMMAND_UNMAP_MEM_OBJECT) {
|
||||
err.set(ptr == memObj->getMappedPtr() ? CL_SUCCESS : CL_INVALID_VALUE);
|
||||
if (cmdType == CL_COMMAND_MAP_IMAGE) {
|
||||
size_t mapOffset =
|
||||
image->getSurfaceFormatInfo().ImageElementSizeInBytes * offset[0] +
|
||||
image->getImageDesc().image_row_pitch * offset[1] +
|
||||
image->getImageDesc().image_slice_pitch * offset[2];
|
||||
void *ptrToReturn = nullptr;
|
||||
if (image->isMemObjZeroCopy()) {
|
||||
ptrToReturn = ptrOffset(image->getCpuAddress(), mapOffset);
|
||||
} else {
|
||||
ptrToReturn = ptrOffset(image->getHostPtr(), mapOffset);
|
||||
}
|
||||
image->setMappedPtr(ptrToReturn);
|
||||
return ptrToReturn;
|
||||
}
|
||||
|
||||
return nullptr; // only map returns pointer
|
||||
@@ -650,41 +650,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::addMapUnmapToWaitlistEventsDependencies(const cl_event *eventWaitList,
|
||||
size_t numEventsInWaitlist,
|
||||
MapOperationType opType,
|
||||
MemObj *memObj,
|
||||
EventBuilder &externalEventBuilder) {
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
|
||||
EventBuilder internalEventBuilder;
|
||||
EventBuilder *eventBuilder;
|
||||
// check if event will be exposed externally
|
||||
if (externalEventBuilder.getEvent()) {
|
||||
externalEventBuilder.getEvent()->incRefInternal();
|
||||
eventBuilder = &externalEventBuilder;
|
||||
} else {
|
||||
// it will be an internal event
|
||||
internalEventBuilder.create<VirtualEvent>(this, context);
|
||||
eventBuilder = &internalEventBuilder;
|
||||
}
|
||||
|
||||
//store task data in event
|
||||
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
|
||||
//bind output event with input events
|
||||
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
|
||||
eventBuilder->addParentEvent(this->virtualEvent);
|
||||
eventBuilder->finalize();
|
||||
|
||||
if (this->virtualEvent) {
|
||||
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
|
||||
this->virtualEvent->decRefInternal();
|
||||
}
|
||||
this->virtualEvent = eventBuilder->getEvent();
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
void *CommandQueueHw<GfxFamily>::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) {
|
||||
if (buffer->peekSharingHandler()) {
|
||||
return enqueueMapSharedBuffer(buffer, blockingMap, mapFlags, offset, size,
|
||||
numEventsInWaitList, eventWaitList, event, errcodeRet);
|
||||
}
|
||||
return cpuDataTransferHandler(reinterpret_cast<MemObj *>(buffer), CL_COMMAND_MAP_BUFFER,
|
||||
blockingMap, offset, size, nullptr,
|
||||
numEventsInWaitList, eventWaitList, event, errcodeRet);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *CommandQueueHw<GfxFamily>::enqueueMapSharedBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags,
|
||||
size_t offset, size_t size, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) {
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
if (!buffer->getMappedPtr()) {
|
||||
auto memory = memoryManager->allocateSystemMemory(buffer->getGraphicsAllocation()->getUnderlyingBufferSize(), 0);
|
||||
buffer->setAllocatedMappedPtr(memory);
|
||||
}
|
||||
|
||||
auto returnPtr = ptrOffset(buffer->getMappedPtr(), offset);
|
||||
errcodeRet = enqueueReadBuffer(buffer, blockingMap, offset, size, returnPtr,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
if (errcodeRet != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
buffer->incMapCount();
|
||||
buffer->setMappedSize(size);
|
||||
buffer->setMappedOffset(offset);
|
||||
return returnPtr;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
@@ -1,169 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/mem_obj/image.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/helpers/get_info.h"
|
||||
#include <new>
|
||||
|
||||
namespace OCLRT {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *CommandQueueHw<GfxFamily>::enqueueMapImage(cl_mem image, cl_bool blockingMap,
|
||||
cl_map_flags mapFlags, const size_t *origin,
|
||||
const size_t *region, size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event,
|
||||
cl_int &errcodeRet) {
|
||||
auto pImage = castToObject<Image>(image);
|
||||
void *ptrToReturn = nullptr;
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
if (pImage->isMemObjZeroCopy()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, image);
|
||||
} else {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, image);
|
||||
}
|
||||
}
|
||||
|
||||
if (pImage->allowTiling() || pImage->peekSharingHandler()) {
|
||||
TakeOwnershipWrapper<Image> imageOwnership(*pImage);
|
||||
size_t slicePitch = pImage->getHostPtrSlicePitch();
|
||||
GetInfoHelper retSlice(imageSlicePitch, sizeof(size_t), nullptr);
|
||||
retSlice.set<size_t>(slicePitch);
|
||||
|
||||
size_t rowPitch = pImage->getHostPtrRowPitch();
|
||||
GetInfoHelper retRowPitch(imageRowPitch, sizeof(size_t), nullptr);
|
||||
retRowPitch.set<size_t>(rowPitch);
|
||||
|
||||
auto memoryManager = device->getMemoryManager();
|
||||
|
||||
size_t Region[] = {region[0] ? region[0] : 1,
|
||||
region[1] ? region[1] : 1,
|
||||
region[2] ? region[2] : 1};
|
||||
|
||||
if (pImage->getFlags() & CL_MEM_USE_HOST_PTR) {
|
||||
size_t offset =
|
||||
pImage->getSurfaceFormatInfo().ImageElementSizeInBytes * origin[0] +
|
||||
pImage->getImageDesc().image_row_pitch * origin[1] +
|
||||
pImage->getImageDesc().image_slice_pitch * origin[2];
|
||||
auto mappedPtr = ptrOffset(pImage->getHostPtr(), offset);
|
||||
pImage->setMappedPtr(mappedPtr);
|
||||
} else if (!pImage->getAllocatedMappedPtr()) {
|
||||
auto memory = memoryManager->allocateSystemMemory(pImage->getSize(), MemoryConstants::pageSize);
|
||||
pImage->setAllocatedMappedPtr(memory);
|
||||
}
|
||||
|
||||
errcodeRet = enqueueReadImage(pImage, blockingMap, origin, Region, rowPitch, slicePitch, pImage->getMappedPtr(),
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
if (errcodeRet == CL_SUCCESS) {
|
||||
pImage->setMappedOrigin((size_t *)origin);
|
||||
pImage->setMappedRegion((size_t *)region);
|
||||
return pImage->getMappedPtr();
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
EventBuilder eventBuilder;
|
||||
TakeOwnershipWrapper<Device> deviceOwnership(*device);
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
auto blockQueue = false;
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, CL_COMMAND_MAP_IMAGE);
|
||||
|
||||
if (event) {
|
||||
eventBuilder.create<Event>(this, CL_COMMAND_MAP_IMAGE, taskLevel, Event::eventNotReady);
|
||||
*event = eventBuilder.getEvent();
|
||||
eventBuilder.getEvent()->setQueueTimeStamp();
|
||||
}
|
||||
|
||||
if (blockQueue) {
|
||||
addMapUnmapToWaitlistEventsDependencies(eventWaitList,
|
||||
static_cast<size_t>(numEventsInWaitList),
|
||||
MAP,
|
||||
pImage,
|
||||
eventBuilder);
|
||||
}
|
||||
|
||||
queueOwnership.unlock();
|
||||
deviceOwnership.unlock();
|
||||
|
||||
if (blockingMap && blockQueue) {
|
||||
errcodeRet = this->virtualEvent->waitForEvents(numEventsInWaitList, eventWaitList);
|
||||
}
|
||||
|
||||
if (!blockQueue) {
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
}
|
||||
|
||||
finish(true);
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
|
||||
if (!pImage->isMemObjZeroCopy()) {
|
||||
pImage->transferDataToHostPtr();
|
||||
}
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setStatus(CL_COMPLETE);
|
||||
eventBuilder.getEvent()->updateTaskCount(this->taskCount);
|
||||
eventBuilder.getEvent()->setEndTimeStamp();
|
||||
}
|
||||
}
|
||||
|
||||
if (imageSlicePitch) {
|
||||
if (pImage->isMemObjZeroCopy()) {
|
||||
*imageSlicePitch = pImage->getImageDesc().image_slice_pitch;
|
||||
} else {
|
||||
*imageSlicePitch = pImage->getHostPtrSlicePitch();
|
||||
}
|
||||
}
|
||||
|
||||
if (imageRowPitch) {
|
||||
if (pImage->isMemObjZeroCopy()) {
|
||||
*imageRowPitch = pImage->getImageDesc().image_row_pitch;
|
||||
} else {
|
||||
*imageRowPitch = pImage->getHostPtrRowPitch();
|
||||
}
|
||||
}
|
||||
|
||||
size_t offset =
|
||||
pImage->getSurfaceFormatInfo().ImageElementSizeInBytes * origin[0] +
|
||||
pImage->getImageDesc().image_row_pitch * origin[1] +
|
||||
pImage->getImageDesc().image_slice_pitch * origin[2];
|
||||
if (pImage->isMemObjZeroCopy()) {
|
||||
ptrToReturn = ptrOffset(pImage->getCpuAddress(), offset);
|
||||
} else {
|
||||
ptrToReturn = ptrOffset(pImage->getHostPtr(), offset);
|
||||
}
|
||||
errcodeRet = CL_SUCCESS;
|
||||
pImage->setMappedPtr(ptrToReturn);
|
||||
return ptrToReturn;
|
||||
}
|
||||
} // namespace OCLRT
|
||||
@@ -51,16 +51,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
if (!isMemTransferNeeded) {
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_MARKER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, true, &offset, &size, ptr, nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
|
||||
@@ -71,16 +64,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_READ_BUFFER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
ptr,
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, true, &offset, &size, ptr, nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
@@ -50,16 +50,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) &&
|
||||
context->getDevice(0)->getDeviceInfo().cpuCopyAllowed) {
|
||||
if (!isMemTransferNeeded) {
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_MARKER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
const_cast<void *>(ptr),
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, true, &offset, &size, const_cast<void *>(ptr), nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
|
||||
@@ -70,16 +64,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
cpuDataTransferHandler(buffer,
|
||||
CL_COMMAND_WRITE_BUFFER,
|
||||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
const_cast<void *>(ptr),
|
||||
numEventsInWaitList,
|
||||
eventWaitList,
|
||||
event,
|
||||
retVal);
|
||||
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, true, &offset, &size, const_cast<void *>(ptr), nullptr, nullptr);
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
|
||||
Reference in New Issue
Block a user