Extract some code blocks to dedicated methods.

Change-Id: I9e47631367b95ce4ff5479c463a3cb5085b66315
This commit is contained in:
Mrozek, Michal 2019-02-21 17:44:17 +01:00 committed by sys_ocldev
parent d5e16d81b0
commit 1ae92e995a
2 changed files with 147 additions and 105 deletions

View File

@ -8,6 +8,7 @@
#pragma once
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/command_queue/command_queue.h"
#include "runtime/device_queue/device_queue_hw.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/program/printf_handler.h"
@ -352,5 +353,24 @@ class CommandQueueHw : public CommandQueue {
size_t bufferSlicePitch,
size_t hostRowPitch,
size_t hostSlicePitch);
void processDeviceEnqueue(Kernel *parentKernel,
DeviceQueueHw<GfxFamily> *devQueueHw,
const MultiDispatchInfo &multiDispatchInfo,
TagNode<HwTimeStamps> *hwTimeStamps,
PreemptionMode preemption,
bool &blocking);
template <uint32_t commandType>
void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
std::unique_ptr<PrintfHandler> &printfHandler,
Event *event,
TagNode<OCLRT::HwTimeStamps> *&hwTimeStamps,
Kernel *parentKernel,
bool blockQueue,
DeviceQueueHw<GfxFamily> *devQueueHw,
CsrDependencies &csrDeps,
KernelOperation *&blockedCommandsData,
TimestampPacketContainer &previousTimestampPacketNodes,
PreemptionMode preemption);
};
} // namespace OCLRT

View File

@ -170,7 +170,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
KernelOperation *blockedCommandsData = nullptr;
std::unique_ptr<PrintfHandler> printfHandler;
bool slmUsed = false;
bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
@ -212,70 +212,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
auto commandStreamStart = commandStream.getUsed();
if (multiDispatchInfo.empty() == false) {
HwPerfCounter *hwPerfCounter = nullptr;
DebugManager.dumpKernelArgs(&multiDispatchInfo);
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
if (printfHandler) {
printfHandler.get()->prepareDispatch(multiDispatchInfo);
}
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
setupDebugSurface(multiDispatchInfo.peekMainKernel());
}
}
if (eventBuilder.getEvent()) {
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
if (this->isProfilingEnabled()) {
// Get allocation for timestamps
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
if (this->isPerfCountersEnabled()) {
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tagForCpuAccess;
// PERF COUNTER: copy current configuration from queue to event
eventBuilder.getEvent()->copyPerfCounters(this->getPerfCountersConfigData());
}
}
}
if (parentKernel) {
parentKernel->createReflectionSurface();
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
if (!blockQueue) {
devQueueHw->resetDeviceQueue();
devQueueHw->acquireEMCriticalSection();
}
}
HardwareInterface<GfxFamily>::dispatchWalker(
*this,
multiDispatchInfo,
csrDeps,
&blockedCommandsData,
hwTimeStamps,
hwPerfCounter,
&previousTimestampPacketNodes,
timestampPacketContainer.get(),
preemption,
blockQueue,
commandType);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &dispatchInfo : multiDispatchInfo) {
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
getCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
}
}
}
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
slmUsed = multiDispatchInfo.usesSlm();
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
previousTimestampPacketNodes, preemption);
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
if (CL_COMMAND_BARRIER == commandType) {
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
@ -295,46 +234,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
CompletionStamp completionStamp;
if (!blockQueue) {
if (parentKernel) {
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
uint32_t taskCount = getCommandStreamReceiver().peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
parentKernel,
(uint32_t)multiDispatchInfo.size(),
taskCount,
hwTimeStamps);
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
scheduler.setArgs(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
parentKernel->getKernelReflectionSurface(),
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
*this,
*devQueueHw,
preemption,
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
scheduler.makeResident(getCommandStreamReceiver());
// Update SLM usage
slmUsed |= scheduler.slmTotalSize > 0;
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getCommandStreamReceiver());
if (parentKernel->isAuxTranslationRequired()) {
blocking = true;
}
processDeviceEnqueue(parentKernel, devQueueHw, multiDispatchInfo, hwTimeStamps, preemption, blocking);
}
auto submissionRequired = !isCommandWithoutKernel(commandType);
@ -446,6 +346,128 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
template <typename GfxFamily>
template <uint32_t commandType>
void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
std::unique_ptr<PrintfHandler> &printfHandler,
Event *event,
TagNode<HwTimeStamps> *&hwTimeStamps,
Kernel *parentKernel,
bool blockQueue,
DeviceQueueHw<GfxFamily> *devQueueHw,
CsrDependencies &csrDeps,
KernelOperation *&blockedCommandsData,
TimestampPacketContainer &previousTimestampPacketNodes,
PreemptionMode preemption) {
HwPerfCounter *hwPerfCounter = nullptr;
DebugManager.dumpKernelArgs(&multiDispatchInfo);
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
if (printfHandler) {
printfHandler.get()->prepareDispatch(multiDispatchInfo);
}
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
setupDebugSurface(multiDispatchInfo.peekMainKernel());
}
}
if (event) {
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
event->addTimestampPacketNodes(*timestampPacketContainer);
}
if (this->isProfilingEnabled()) {
// Get allocation for timestamps
hwTimeStamps = event->getHwTimeStampNode();
if (this->isPerfCountersEnabled()) {
hwPerfCounter = event->getHwPerfCounterNode()->tagForCpuAccess;
// PERF COUNTER: copy current configuration from queue to event
event->copyPerfCounters(this->getPerfCountersConfigData());
}
}
}
if (parentKernel) {
parentKernel->createReflectionSurface();
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
if (!blockQueue) {
devQueueHw->resetDeviceQueue();
devQueueHw->acquireEMCriticalSection();
}
}
HardwareInterface<GfxFamily>::dispatchWalker(
*this,
multiDispatchInfo,
csrDeps,
&blockedCommandsData,
hwTimeStamps,
hwPerfCounter,
&previousTimestampPacketNodes,
timestampPacketContainer.get(),
preemption,
blockQueue,
commandType);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
for (auto &dispatchInfo : multiDispatchInfo) {
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
getCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
}
}
}
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
DeviceQueueHw<GfxFamily> *devQueueHw,
const MultiDispatchInfo &multiDispatchInfo,
TagNode<HwTimeStamps> *hwTimeStamps,
PreemptionMode preemption,
bool &blocking) {
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
uint32_t taskCount = getCommandStreamReceiver().peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
parentKernel,
(uint32_t)multiDispatchInfo.size(),
taskCount,
hwTimeStamps);
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
scheduler.setArgs(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
parentKernel->getKernelReflectionSurface(),
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
*this,
*devQueueHw,
preemption,
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
scheduler.makeResident(getCommandStreamReceiver());
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getCommandStreamReceiver());
if (parentKernel->isAuxTranslationRequired()) {
blocking = true;
}
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) {
auto isQueueBlockedStatus = isQueueBlocked();