638 lines
26 KiB
C++
638 lines
26 KiB
C++
/*
|
|
* Copyright (c) 2018, Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "runtime/command_queue/command_queue.h"
|
|
#include "runtime/command_queue/command_queue_hw.h"
|
|
#include "runtime/command_stream/command_stream_receiver.h"
|
|
#include "runtime/context/context.h"
|
|
#include "runtime/device/device.h"
|
|
#include "runtime/device_queue/device_queue.h"
|
|
#include "runtime/event/event.h"
|
|
#include "runtime/event/event_builder.h"
|
|
#include "runtime/helpers/aligned_memory.h"
|
|
#include "runtime/helpers/array_count.h"
|
|
#include "runtime/helpers/get_info.h"
|
|
#include "runtime/helpers/options.h"
|
|
#include "runtime/helpers/ptr_math.h"
|
|
#include "runtime/mem_obj/buffer.h"
|
|
#include "runtime/mem_obj/image.h"
|
|
#include "runtime/helpers/surface_formats.h"
|
|
#include "runtime/memory_manager/memory_manager.h"
|
|
#include "runtime/helpers/string.h"
|
|
#include "CL/cl_ext.h"
|
|
#include "runtime/utilities/api_intercept.h"
|
|
#include "runtime/helpers/convert_color.h"
|
|
#include "runtime/helpers/queue_helpers.h"
|
|
#include <map>
|
|
|
|
namespace OCLRT {
|
|
|
|
// Global table of create functions
|
|
CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE] = {};
|
|
|
|
CommandQueue *CommandQueue::create(Context *context,
|
|
Device *device,
|
|
const cl_queue_properties *properties,
|
|
cl_int &retVal) {
|
|
retVal = CL_SUCCESS;
|
|
|
|
auto funcCreate = commandQueueFactory[device->getRenderCoreFamily()];
|
|
DEBUG_BREAK_IF(nullptr == funcCreate);
|
|
|
|
return funcCreate(context, device, properties);
|
|
}
|
|
|
|
CommandQueue::CommandQueue() : CommandQueue(nullptr, nullptr, 0) {
|
|
}
|
|
|
|
CommandQueue::CommandQueue(Context *context,
|
|
Device *deviceId,
|
|
const cl_queue_properties *properties) : taskCount(0),
|
|
taskLevel(0),
|
|
virtualEvent(nullptr),
|
|
context(context),
|
|
device(deviceId),
|
|
priority(QueuePriority::MEDIUM),
|
|
throttle(QueueThrottle::MEDIUM),
|
|
perfCountersEnabled(false),
|
|
perfCountersConfig(UINT32_MAX),
|
|
perfCountersUserRegistersNumber(0),
|
|
perfConfigurationData(nullptr),
|
|
perfCountersRegsCfgHandle(0),
|
|
perfCountersRegsCfgPending(false),
|
|
commandStream(nullptr) {
|
|
if (context) {
|
|
context->incRefInternal();
|
|
}
|
|
for (int i = 0; i < NUM_HEAPS; ++i) {
|
|
indirectHeap[i] = nullptr;
|
|
}
|
|
commandQueueProperties = getCmdQueueProperties<cl_command_queue_properties>(properties);
|
|
flushStamp.reset(new FlushStampTracker(true));
|
|
}
|
|
|
|
CommandQueue::~CommandQueue() {
|
|
if (virtualEvent) {
|
|
UNRECOVERABLE_IF(this->virtualEvent->getCommandQueue() != this && this->virtualEvent->getCommandQueue() != nullptr);
|
|
virtualEvent->setCurrentCmdQVirtualEvent(false);
|
|
virtualEvent->decRefInternal();
|
|
}
|
|
|
|
if (device) {
|
|
auto memoryManager = device->getMemoryManager();
|
|
DEBUG_BREAK_IF(nullptr == memoryManager);
|
|
|
|
if (commandStream && commandStream->getGraphicsAllocation()) {
|
|
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
|
|
commandStream->replaceGraphicsAllocation(nullptr);
|
|
}
|
|
delete commandStream;
|
|
|
|
for (int i = 0; i < NUM_HEAPS; ++i) {
|
|
if (indirectHeap[i] != nullptr) {
|
|
auto allocation = indirectHeap[i]->getGraphicsAllocation();
|
|
if (allocation != nullptr) {
|
|
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), REUSABLE_ALLOCATION);
|
|
}
|
|
delete indirectHeap[i];
|
|
}
|
|
}
|
|
if (perfConfigurationData) {
|
|
delete perfConfigurationData;
|
|
}
|
|
if (this->perfCountersEnabled) {
|
|
device->getPerformanceCounters()->shutdown();
|
|
}
|
|
}
|
|
|
|
//for normal queue, decrement ref count on context
|
|
//special queue is owned by context so ref count doesn't have to be decremented
|
|
if (context && !isSpecialCommandQueue) {
|
|
context->decRefInternal();
|
|
}
|
|
}
|
|
|
|
uint32_t CommandQueue::getHwTag() const {
|
|
uint32_t tag = *getHwTagAddress();
|
|
return tag;
|
|
}
|
|
|
|
volatile uint32_t *CommandQueue::getHwTagAddress() const {
|
|
DEBUG_BREAK_IF(!this->device);
|
|
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
|
auto tag_address = commandStreamReceiver.getTagAddress();
|
|
commandStreamReceiver.makeCoherent((void *)tag_address, sizeof(tag_address));
|
|
return tag_address;
|
|
}
|
|
|
|
bool CommandQueue::isCompleted(uint32_t taskCount) const {
|
|
uint32_t tag = getHwTag();
|
|
DEBUG_BREAK_IF(tag == Event::eventNotReady);
|
|
return tag >= taskCount;
|
|
}
|
|
|
|
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait) {
|
|
WAIT_ENTER()
|
|
|
|
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
|
|
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
|
|
|
|
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait);
|
|
|
|
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
|
|
latestTaskCountWaited = taskCountToWait;
|
|
WAIT_LEAVE()
|
|
}
|
|
|
|
bool CommandQueue::isQueueBlocked() {
|
|
TakeOwnershipWrapper<CommandQueue> takeOwnershipWrapper(*this);
|
|
//check if we have user event and if so, if it is in blocked state.
|
|
if (this->virtualEvent) {
|
|
if (this->virtualEvent->peekExecutionStatus() <= CL_COMPLETE) {
|
|
UNRECOVERABLE_IF(this->virtualEvent == nullptr);
|
|
|
|
if (this->virtualEvent->isStatusCompletedByTermination() == false) {
|
|
taskCount = this->virtualEvent->peekTaskCount();
|
|
flushStamp->setStamp(this->virtualEvent->flushStamp->peekStamp());
|
|
taskLevel = this->virtualEvent->taskLevel;
|
|
// If this isn't an OOQ, update the taskLevel for the queue
|
|
if (!isOOQEnabled()) {
|
|
taskLevel++;
|
|
}
|
|
} else {
|
|
//at this point we may reset queue TaskCount, since all command previous to this were aborted
|
|
taskCount = 0;
|
|
flushStamp->setStamp(0);
|
|
taskLevel = getDevice().getCommandStreamReceiver().peekTaskLevel();
|
|
}
|
|
|
|
DebugManager.log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load());
|
|
|
|
//close the access to virtual event, driver added only 1 ref count.
|
|
this->virtualEvent->decRefInternal();
|
|
this->virtualEvent = nullptr;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName,
|
|
size_t paramValueSize,
|
|
void *paramValue,
|
|
size_t *paramValueSizeRet) {
|
|
return getQueueInfo<CommandQueue>(this, paramName, paramValueSize, paramValue, paramValueSizeRet);
|
|
}
|
|
|
|
uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
|
|
cl_uint numEventsInWaitList,
|
|
const cl_event *eventWaitList) {
|
|
for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) {
|
|
auto pEvent = (Event *)(eventWaitList[iEvent]);
|
|
uint32_t eventTaskLevel = pEvent->taskLevel;
|
|
taskLevel = std::max(taskLevel, eventTaskLevel);
|
|
}
|
|
return taskLevel;
|
|
}
|
|
|
|
size_t CommandQueue::getInstructionHeapReservedBlockSize() const {
|
|
return alignUp(device->getCommandStreamReceiver().getInstructionHeapCmdStreamReceiverReservedSize(),
|
|
MemoryConstants::cacheLineSize);
|
|
}
|
|
|
|
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
|
|
size_t minRequiredSize) {
|
|
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= ARRAY_COUNT(indirectHeap));
|
|
auto &heap = indirectHeap[heapType];
|
|
GraphicsAllocation *heapMemory = nullptr;
|
|
|
|
DEBUG_BREAK_IF(nullptr == device);
|
|
auto memoryManager = device->getMemoryManager();
|
|
DEBUG_BREAK_IF(nullptr == memoryManager);
|
|
|
|
if (heap)
|
|
heapMemory = heap->getGraphicsAllocation();
|
|
|
|
if (heap && heap->getAvailableSpace() < minRequiredSize && heapMemory) {
|
|
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(heapMemory), REUSABLE_ALLOCATION);
|
|
heapMemory = nullptr;
|
|
}
|
|
|
|
if (!heapMemory) {
|
|
size_t reservedSize = 0;
|
|
auto finalHeapSize = defaultHeapSize;
|
|
if (heapType == IndirectHeap::INSTRUCTION) {
|
|
finalHeapSize = optimalInstructionHeapSize;
|
|
reservedSize = getInstructionHeapReservedBlockSize();
|
|
}
|
|
|
|
minRequiredSize += reservedSize;
|
|
|
|
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
|
|
|
|
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
|
|
|
|
if (!heapMemory) {
|
|
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
|
|
} else {
|
|
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
|
|
}
|
|
|
|
if (IndirectHeap::SURFACE_STATE == heapType) {
|
|
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
|
|
finalHeapSize = maxSshSize;
|
|
}
|
|
|
|
if (heap) {
|
|
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
|
|
heap->replaceGraphicsAllocation(heapMemory);
|
|
} else {
|
|
heap = new IndirectHeap(heapMemory);
|
|
heap->overrideMaxSize(finalHeapSize);
|
|
}
|
|
|
|
if (heapType == IndirectHeap::INSTRUCTION) {
|
|
device->getCommandStreamReceiver().initializeInstructionHeapCmdStreamReceiverReservedBlock(*heap);
|
|
heap->align(MemoryConstants::cacheLineSize);
|
|
}
|
|
}
|
|
|
|
return *heap;
|
|
}
|
|
|
|
void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
|
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= ARRAY_COUNT(indirectHeap));
|
|
auto &heap = indirectHeap[heapType];
|
|
|
|
DEBUG_BREAK_IF(nullptr == device);
|
|
auto memoryManager = device->getMemoryManager();
|
|
DEBUG_BREAK_IF(nullptr == memoryManager);
|
|
|
|
if (heap) {
|
|
auto heapMemory = heap->getGraphicsAllocation();
|
|
if (heapMemory != nullptr)
|
|
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(heapMemory), REUSABLE_ALLOCATION);
|
|
heap->replaceBuffer(nullptr, 0);
|
|
heap->replaceGraphicsAllocation(nullptr);
|
|
}
|
|
}
|
|
|
|
LinearStream &CommandQueue::getCS(size_t minRequiredSize) {
|
|
DEBUG_BREAK_IF(nullptr == device);
|
|
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
|
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
|
DEBUG_BREAK_IF(nullptr == memoryManager);
|
|
|
|
if (!commandStream) {
|
|
commandStream = new LinearStream(nullptr);
|
|
}
|
|
|
|
// Make sure we have enough room for any CSR additions
|
|
minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize;
|
|
|
|
if (commandStream->getAvailableSpace() < minRequiredSize) {
|
|
// If not, allocate a new block. allocate full pages
|
|
minRequiredSize = alignUp(minRequiredSize, MemoryConstants::pageSize);
|
|
|
|
auto requiredSize = minRequiredSize + CSRequirements::csOverfetchSize;
|
|
|
|
GraphicsAllocation *allocation = memoryManager->obtainReusableAllocation(requiredSize).release();
|
|
|
|
if (!allocation) {
|
|
allocation = memoryManager->allocateGraphicsMemory(requiredSize, MemoryConstants::pageSize);
|
|
}
|
|
|
|
// Deallocate the old block, if not null
|
|
auto oldAllocation = commandStream->getGraphicsAllocation();
|
|
|
|
if (oldAllocation) {
|
|
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(oldAllocation), REUSABLE_ALLOCATION);
|
|
}
|
|
commandStream->replaceBuffer(allocation->getUnderlyingBuffer(), minRequiredSize - CSRequirements::minCommandQueueCommandStreamSize);
|
|
commandStream->replaceGraphicsAllocation(allocation);
|
|
}
|
|
|
|
return *commandStream;
|
|
}
|
|
|
|
cl_int CommandQueue::enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) {
|
|
if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
for (unsigned int object = 0; object < numObjects; object++) {
|
|
auto memObject = castToObject<MemObj>(memObjects[object]);
|
|
if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) {
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
|
|
memObject->peekSharingHandler()->acquire(memObject);
|
|
memObject->acquireCount++;
|
|
}
|
|
auto status = enqueueMarkerWithWaitList(
|
|
numEventsInWaitList,
|
|
eventWaitList,
|
|
oclEvent);
|
|
|
|
if (oclEvent) {
|
|
castToObjectOrAbort<Event>(*oclEvent)->setCmdType(cmdType);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) {
|
|
if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
for (unsigned int object = 0; object < numObjects; object++) {
|
|
auto memObject = castToObject<MemObj>(memObjects[object]);
|
|
if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) {
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
|
|
memObject->peekSharingHandler()->release(memObject);
|
|
DEBUG_BREAK_IF(memObject->acquireCount <= 0);
|
|
memObject->acquireCount--;
|
|
}
|
|
auto status = enqueueMarkerWithWaitList(
|
|
numEventsInWaitList,
|
|
eventWaitList,
|
|
oclEvent);
|
|
|
|
if (oclEvent) {
|
|
castToObjectOrAbort<Event>(*oclEvent)->setCmdType(cmdType);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
void CommandQueue::updateFromCompletionStamp(const CompletionStamp &completionStamp) {
|
|
DEBUG_BREAK_IF(this->taskLevel > completionStamp.taskLevel);
|
|
DEBUG_BREAK_IF(this->taskCount > completionStamp.taskCount);
|
|
if (completionStamp.taskCount != Event::eventNotReady) {
|
|
taskCount = completionStamp.taskCount;
|
|
}
|
|
flushStamp->setStamp(completionStamp.flushStamp);
|
|
this->taskLevel = completionStamp.taskLevel;
|
|
}
|
|
|
|
void CommandQueue::flushWaitList(
|
|
cl_uint numEventsInWaitList,
|
|
const cl_event *eventWaitList,
|
|
bool ndRangeKernel) {
|
|
|
|
bool isQBlocked = false;
|
|
|
|
//as long as queue is blocked we need to stall.
|
|
if (!isOOQEnabled()) {
|
|
while ((isQBlocked = isQueueBlocked()))
|
|
;
|
|
}
|
|
device->getCommandStreamReceiver().flushBatchedSubmissions();
|
|
}
|
|
|
|
bool CommandQueue::setPerfCountersEnabled(bool perfCountersEnabled, cl_uint configuration) {
|
|
DEBUG_BREAK_IF(device == nullptr);
|
|
if (perfCountersEnabled == this->perfCountersEnabled) {
|
|
return true;
|
|
}
|
|
auto perfCounters = device->getPerformanceCounters();
|
|
if (perfCountersEnabled) {
|
|
perfCounters->enable();
|
|
if (!perfCounters->isAvailable()) {
|
|
perfCounters->shutdown();
|
|
return false;
|
|
}
|
|
perfConfigurationData = perfCounters->getPmRegsCfg(configuration);
|
|
if (perfConfigurationData == nullptr) {
|
|
perfCounters->shutdown();
|
|
return false;
|
|
}
|
|
InstrReadRegsCfg *pUserCounters = &perfConfigurationData->ReadRegs;
|
|
for (uint32_t i = 0; i < pUserCounters->RegsCount; ++i) {
|
|
perfCountersUserRegistersNumber++;
|
|
if (pUserCounters->Reg[i].BitSize > 32) {
|
|
perfCountersUserRegistersNumber++;
|
|
}
|
|
}
|
|
} else {
|
|
if (perfCounters->isAvailable()) {
|
|
perfCounters->shutdown();
|
|
}
|
|
}
|
|
this->perfCountersConfig = configuration;
|
|
this->perfCountersEnabled = perfCountersEnabled;
|
|
|
|
return true;
|
|
}
|
|
|
|
PerformanceCounters *CommandQueue::getPerfCounters() {
|
|
return device->getPerformanceCounters();
|
|
}
|
|
|
|
bool CommandQueue::sendPerfCountersConfig() {
|
|
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
|
|
}
|
|
|
|
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) {
|
|
auto image = castToObject<Image>(memObj);
|
|
if (image) {
|
|
auto retVal = enqueueWriteImage(image, CL_FALSE, image->getMappedOrigin(), image->getMappedRegion(), image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(),
|
|
mappedPtr, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
|
bool mustCallFinish = true;
|
|
if (!(image->getFlags() & CL_MEM_USE_HOST_PTR)) {
|
|
mustCallFinish = true;
|
|
} else {
|
|
mustCallFinish = (CommandQueue::getTaskLevelFromWaitList(this->taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList) != Event::eventNotReady);
|
|
}
|
|
if (mustCallFinish) {
|
|
finish(true);
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
auto buffer = castToObject<Buffer>(memObj);
|
|
if (buffer) {
|
|
auto writePtr = ptrOffset(mappedPtr, buffer->getMappedOffset());
|
|
|
|
return enqueueWriteBuffer(buffer, CL_TRUE, buffer->getMappedOffset(), buffer->getMappedSize(), writePtr,
|
|
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
|
}
|
|
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
|
|
void *CommandQueue::enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
|
|
auto memoryManager = device->getMemoryManager();
|
|
|
|
auto memObj = transferProperties.memObj;
|
|
auto offset = transferProperties.offset;
|
|
auto size = transferProperties.size;
|
|
void *returnPtr = nullptr;
|
|
void *baseMapPtr = nullptr;
|
|
|
|
if (memObj->getFlags() & CL_MEM_USE_HOST_PTR) {
|
|
baseMapPtr = memObj->getHostPtr();
|
|
} else {
|
|
TakeOwnershipWrapper<MemObj> memObjOwnership(*transferProperties.memObj);
|
|
if (!memObj->getAllocatedMappedPtr()) {
|
|
auto memory = memoryManager->allocateSystemMemory(memObj->getSize(), MemoryConstants::pageSize);
|
|
memObj->setAllocatedMappedPtr(memory);
|
|
}
|
|
baseMapPtr = memObj->getAllocatedMappedPtr();
|
|
}
|
|
|
|
auto buffer = castToObject<Buffer>(memObj);
|
|
if (buffer) {
|
|
returnPtr = ptrOffset(baseMapPtr, *offset);
|
|
errcodeRet = enqueueReadBuffer(buffer, transferProperties.blocking, *offset, *size, returnPtr,
|
|
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
|
|
|
buffer->setMappedSize(*size);
|
|
buffer->setMappedOffset(*offset);
|
|
} else {
|
|
auto image = castToObject<Image>(memObj);
|
|
size_t slicePitch = image->getHostPtrSlicePitch();
|
|
size_t rowPitch = image->getHostPtrRowPitch();
|
|
|
|
GetInfoHelper::set(transferProperties.retSlicePitch, slicePitch);
|
|
GetInfoHelper::set(transferProperties.retRowPitch, rowPitch);
|
|
|
|
size_t mapOffset = image->getSurfaceFormatInfo().ImageElementSizeInBytes * offset[0] +
|
|
rowPitch * offset[1] +
|
|
slicePitch * offset[2];
|
|
returnPtr = ptrOffset(baseMapPtr, mapOffset);
|
|
|
|
size_t mappedRegion[3] = {size[0] ? size[0] : 1,
|
|
size[1] ? size[1] : 1,
|
|
size[2] ? size[2] : 1};
|
|
|
|
errcodeRet = enqueueReadImage(image, transferProperties.blocking, offset, mappedRegion, rowPitch, slicePitch, returnPtr,
|
|
eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent);
|
|
|
|
image->setMappedOrigin((size_t *)offset);
|
|
image->setMappedRegion((size_t *)mappedRegion);
|
|
}
|
|
|
|
if (errcodeRet == CL_SUCCESS) {
|
|
memObj->incMapCount();
|
|
memObj->setMappedPtr(returnPtr);
|
|
} else {
|
|
returnPtr = nullptr;
|
|
}
|
|
return returnPtr;
|
|
}
|
|
|
|
void *CommandQueue::enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) {
|
|
if (transferProperties.memObj->mappingOnCpuAllowed()) {
|
|
return cpuDataTransferHandler(transferProperties, eventsRequest, errcodeRet);
|
|
} else {
|
|
return enqueueReadMemObjForMap(transferProperties, eventsRequest, errcodeRet);
|
|
}
|
|
}
|
|
|
|
cl_int CommandQueue::enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest) {
|
|
cl_int retVal;
|
|
if (transferProperties.memObj->mappingOnCpuAllowed()) {
|
|
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
|
|
} else {
|
|
retVal = enqueueWriteMemObjForUnmap(transferProperties.memObj, transferProperties.ptr, eventsRequest);
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
void *CommandQueue::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap,
|
|
cl_map_flags mapFlags, size_t offset,
|
|
size_t size, cl_uint numEventsInWaitList,
|
|
const cl_event *eventWaitList, cl_event *event,
|
|
cl_int &errcodeRet) {
|
|
|
|
TransferProperties transferProperties(buffer, CL_COMMAND_MAP_BUFFER, blockingMap != CL_FALSE, &offset, &size, nullptr, nullptr, nullptr);
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
|
|
}
|
|
|
|
void *CommandQueue::enqueueMapImage(Image *image, cl_bool blockingMap,
|
|
cl_map_flags mapFlags, const size_t *origin,
|
|
const size_t *region, size_t *imageRowPitch,
|
|
size_t *imageSlicePitch,
|
|
cl_uint numEventsInWaitList,
|
|
const cl_event *eventWaitList, cl_event *event,
|
|
cl_int &errcodeRet) {
|
|
|
|
TransferProperties transferProperties(image, CL_COMMAND_MAP_IMAGE, blockingMap != CL_FALSE,
|
|
const_cast<size_t *>(origin), const_cast<size_t *>(region), nullptr,
|
|
imageRowPitch, imageSlicePitch);
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet);
|
|
}
|
|
|
|
cl_int CommandQueue::enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) {
|
|
|
|
TransferProperties transferProperties(memObj, CL_COMMAND_UNMAP_MEM_OBJECT, false,
|
|
nullptr, nullptr, mappedPtr, nullptr, nullptr);
|
|
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
|
|
|
return enqueueUnmapMemObject(transferProperties, eventsRequest);
|
|
}
|
|
|
|
void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList,
|
|
size_t numEventsInWaitlist,
|
|
MapOperationType opType,
|
|
MemObj *memObj,
|
|
EventBuilder &externalEventBuilder) {
|
|
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
|
|
|
EventBuilder internalEventBuilder;
|
|
EventBuilder *eventBuilder;
|
|
// check if event will be exposed externally
|
|
if (externalEventBuilder.getEvent()) {
|
|
externalEventBuilder.getEvent()->incRefInternal();
|
|
eventBuilder = &externalEventBuilder;
|
|
} else {
|
|
// it will be an internal event
|
|
internalEventBuilder.create<VirtualEvent>(this, context);
|
|
eventBuilder = &internalEventBuilder;
|
|
}
|
|
|
|
//store task data in event
|
|
auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, commandStreamReceiver, *this));
|
|
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
|
|
|
//bind output event with input events
|
|
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitlist));
|
|
eventBuilder->addParentEvent(this->virtualEvent);
|
|
eventBuilder->finalize();
|
|
|
|
if (this->virtualEvent) {
|
|
this->virtualEvent->setCurrentCmdQVirtualEvent(false);
|
|
this->virtualEvent->decRefInternal();
|
|
}
|
|
this->virtualEvent = eventBuilder->getEvent();
|
|
}
|
|
|
|
} // namespace OCLRT
|