Move blit enqueue out of enqueueCommon [1/n]

Change-Id: I130a1015ec744c189ddd42af06202bf11b782a6d
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-08-25 14:00:20 +02:00
committed by sys_ocldev
parent 26e700f649
commit 90d67f3df2
3 changed files with 129 additions and 7 deletions

View File

@ -347,6 +347,9 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList,
cl_event *event);
template <uint32_t cmdType>
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking);
template <uint32_t commandType>
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
size_t surfaceCount,

View File

@ -1053,4 +1053,118 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type);
}
template <typename GfxFamily>
template <uint32_t cmdType>
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) {
auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
TimeStampData queueTimeStamp;
EventBuilder eventBuilder;
if (isProfilingEnabled() && eventsRequest.outEvent) {
this->getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
}
if (eventsRequest.outEvent) {
eventBuilder.create<Event>(this, cmdType, CompletionStamp::notReady, 0);
*eventsRequest.outEvent = eventBuilder.getEvent();
if (eventBuilder.getEvent()->isProfilingEnabled()) {
eventBuilder.getEvent()->setQueueTimeStamp(&queueTimeStamp);
}
DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", cmdType, "output Event", eventBuilder.getEvent());
}
std::unique_ptr<KernelOperation> blockedCommandsData;
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
auto blockQueue = false;
auto taskLevel = 0u;
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType);
auto clearAllDependencies = queueDependenciesClearRequired();
DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel);
enqueueHandlerHook(cmdType, multiDispatchInfo);
aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo);
if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
blocking = true;
}
TimestampPacketDependencies timestampPacketDependencies;
BlitPropertiesContainer blitPropertiesContainer;
CsrDependencies csrDeps;
eventsRequest.fillCsrDependencies(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All);
auto allocator = getBcsCommandStreamReceiver()->getTimestampPacketAllocator();
if (isCacheFlushForBcsRequired() && isGpgpuSubmissionForBcsRequired(blockQueue)) {
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
}
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
}
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, true);
csrDeps.push_back(&timestampPacketDependencies.previousEnqueueNodes);
auto &commandStream = *obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0);
auto commandStreamStart = commandStream.getUsed();
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
eventsRequest, commandStream, cmdType, blockQueue));
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
const EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer);
if (!blockQueue) {
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
completionStamp = enqueueCommandWithoutKernel(
nullptr,
0,
commandStream,
commandStreamStart,
blocking,
enqueueProperties,
timestampPacketDependencies,
eventsRequest,
eventBuilder,
taskLevel);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
}
this->latestSentEnqueueType = enqueueProperties.operation;
}
updateFromCompletionStamp(completionStamp);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, bcsTaskCount, completionStamp.taskLevel, completionStamp.flushStamp);
FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load());
}
if (blockQueue) {
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr);
}
queueOwnership.unlock();
commandStreamRecieverOwnership.unlock();
if (blocking) {
if (blockQueue) {
while (isQueueBlocked()) {
}
}
waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false);
}
}
} // namespace NEO

View File

@ -128,13 +128,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
enqueueHandler<CL_COMMAND_READ_BUFFER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (blitEnqueueAllowed(cmdType)) {
enqueueBlit<CL_COMMAND_READ_BUFFER>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blockingRead);
} else {
enqueueHandler<CL_COMMAND_READ_BUFFER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
}
return CL_SUCCESS;
}