Extract some code blocks to dedicated methods.
Change-Id: I9e47631367b95ce4ff5479c463a3cb5085b66315
This commit is contained in:
parent
d5e16d81b0
commit
1ae92e995a
|
@ -8,6 +8,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "runtime/command_stream/command_stream_receiver.h"
|
#include "runtime/command_stream/command_stream_receiver.h"
|
||||||
#include "runtime/command_queue/command_queue.h"
|
#include "runtime/command_queue/command_queue.h"
|
||||||
|
#include "runtime/device_queue/device_queue_hw.h"
|
||||||
#include "runtime/mem_obj/mem_obj.h"
|
#include "runtime/mem_obj/mem_obj.h"
|
||||||
#include "runtime/memory_manager/graphics_allocation.h"
|
#include "runtime/memory_manager/graphics_allocation.h"
|
||||||
#include "runtime/program/printf_handler.h"
|
#include "runtime/program/printf_handler.h"
|
||||||
|
@ -352,5 +353,24 @@ class CommandQueueHw : public CommandQueue {
|
||||||
size_t bufferSlicePitch,
|
size_t bufferSlicePitch,
|
||||||
size_t hostRowPitch,
|
size_t hostRowPitch,
|
||||||
size_t hostSlicePitch);
|
size_t hostSlicePitch);
|
||||||
|
void processDeviceEnqueue(Kernel *parentKernel,
|
||||||
|
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||||
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
|
PreemptionMode preemption,
|
||||||
|
bool &blocking);
|
||||||
|
|
||||||
|
template <uint32_t commandType>
|
||||||
|
void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
|
||||||
|
std::unique_ptr<PrintfHandler> &printfHandler,
|
||||||
|
Event *event,
|
||||||
|
TagNode<OCLRT::HwTimeStamps> *&hwTimeStamps,
|
||||||
|
Kernel *parentKernel,
|
||||||
|
bool blockQueue,
|
||||||
|
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||||
|
CsrDependencies &csrDeps,
|
||||||
|
KernelOperation *&blockedCommandsData,
|
||||||
|
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||||
|
PreemptionMode preemption);
|
||||||
};
|
};
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|
|
@ -170,7 +170,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||||
perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
||||||
KernelOperation *blockedCommandsData = nullptr;
|
KernelOperation *blockedCommandsData = nullptr;
|
||||||
std::unique_ptr<PrintfHandler> printfHandler;
|
std::unique_ptr<PrintfHandler> printfHandler;
|
||||||
bool slmUsed = false;
|
bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
|
||||||
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||||
|
|
||||||
|
@ -212,70 +212,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||||
auto commandStreamStart = commandStream.getUsed();
|
auto commandStreamStart = commandStream.getUsed();
|
||||||
|
|
||||||
if (multiDispatchInfo.empty() == false) {
|
if (multiDispatchInfo.empty() == false) {
|
||||||
HwPerfCounter *hwPerfCounter = nullptr;
|
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||||
DebugManager.dumpKernelArgs(&multiDispatchInfo);
|
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
|
||||||
|
previousTimestampPacketNodes, preemption);
|
||||||
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
|
|
||||||
if (printfHandler) {
|
|
||||||
printfHandler.get()->prepareDispatch(multiDispatchInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
|
||||||
if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
|
|
||||||
setupDebugSurface(multiDispatchInfo.peekMainKernel());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eventBuilder.getEvent()) {
|
|
||||||
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
|
||||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
|
||||||
}
|
|
||||||
if (this->isProfilingEnabled()) {
|
|
||||||
// Get allocation for timestamps
|
|
||||||
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
|
|
||||||
if (this->isPerfCountersEnabled()) {
|
|
||||||
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tagForCpuAccess;
|
|
||||||
// PERF COUNTER: copy current configuration from queue to event
|
|
||||||
eventBuilder.getEvent()->copyPerfCounters(this->getPerfCountersConfigData());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (parentKernel) {
|
|
||||||
parentKernel->createReflectionSurface();
|
|
||||||
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
|
|
||||||
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
|
|
||||||
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
|
|
||||||
if (!blockQueue) {
|
|
||||||
devQueueHw->resetDeviceQueue();
|
|
||||||
devQueueHw->acquireEMCriticalSection();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HardwareInterface<GfxFamily>::dispatchWalker(
|
|
||||||
*this,
|
|
||||||
multiDispatchInfo,
|
|
||||||
csrDeps,
|
|
||||||
&blockedCommandsData,
|
|
||||||
hwTimeStamps,
|
|
||||||
hwPerfCounter,
|
|
||||||
&previousTimestampPacketNodes,
|
|
||||||
timestampPacketContainer.get(),
|
|
||||||
preemption,
|
|
||||||
blockQueue,
|
|
||||||
commandType);
|
|
||||||
|
|
||||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
|
||||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
|
||||||
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
|
|
||||||
getCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
|
||||||
|
|
||||||
slmUsed = multiDispatchInfo.usesSlm();
|
|
||||||
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
if (CL_COMMAND_BARRIER == commandType) {
|
if (CL_COMMAND_BARRIER == commandType) {
|
||||||
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
getCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
||||||
|
@ -295,46 +234,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||||
CompletionStamp completionStamp;
|
CompletionStamp completionStamp;
|
||||||
if (!blockQueue) {
|
if (!blockQueue) {
|
||||||
if (parentKernel) {
|
if (parentKernel) {
|
||||||
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
processDeviceEnqueue(parentKernel, devQueueHw, multiDispatchInfo, hwTimeStamps, preemption, blocking);
|
||||||
|
|
||||||
uint32_t taskCount = getCommandStreamReceiver().peekTaskCount() + 1;
|
|
||||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
|
||||||
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
|
||||||
parentKernel,
|
|
||||||
(uint32_t)multiDispatchInfo.size(),
|
|
||||||
taskCount,
|
|
||||||
hwTimeStamps);
|
|
||||||
|
|
||||||
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
|
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
|
||||||
|
|
||||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
|
||||||
devQueueHw->getStackBuffer(),
|
|
||||||
devQueueHw->getEventPoolBuffer(),
|
|
||||||
devQueueHw->getSlbBuffer(),
|
|
||||||
devQueueHw->getDshBuffer(),
|
|
||||||
parentKernel->getKernelReflectionSurface(),
|
|
||||||
devQueueHw->getQueueStorageBuffer(),
|
|
||||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
|
||||||
devQueueHw->getDebugQueue());
|
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
|
||||||
*this,
|
|
||||||
*devQueueHw,
|
|
||||||
preemption,
|
|
||||||
scheduler,
|
|
||||||
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
|
||||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
|
||||||
|
|
||||||
scheduler.makeResident(getCommandStreamReceiver());
|
|
||||||
|
|
||||||
// Update SLM usage
|
|
||||||
slmUsed |= scheduler.slmTotalSize > 0;
|
|
||||||
|
|
||||||
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getCommandStreamReceiver());
|
|
||||||
if (parentKernel->isAuxTranslationRequired()) {
|
|
||||||
blocking = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto submissionRequired = !isCommandWithoutKernel(commandType);
|
auto submissionRequired = !isCommandWithoutKernel(commandType);
|
||||||
|
@ -446,6 +346,128 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
template <uint32_t commandType>
|
||||||
|
void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
|
||||||
|
std::unique_ptr<PrintfHandler> &printfHandler,
|
||||||
|
Event *event,
|
||||||
|
TagNode<HwTimeStamps> *&hwTimeStamps,
|
||||||
|
Kernel *parentKernel,
|
||||||
|
bool blockQueue,
|
||||||
|
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||||
|
CsrDependencies &csrDeps,
|
||||||
|
KernelOperation *&blockedCommandsData,
|
||||||
|
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||||
|
PreemptionMode preemption) {
|
||||||
|
HwPerfCounter *hwPerfCounter = nullptr;
|
||||||
|
DebugManager.dumpKernelArgs(&multiDispatchInfo);
|
||||||
|
|
||||||
|
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
|
||||||
|
if (printfHandler) {
|
||||||
|
printfHandler.get()->prepareDispatch(multiDispatchInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
|
||||||
|
if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
|
||||||
|
setupDebugSurface(multiDispatchInfo.peekMainKernel());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event) {
|
||||||
|
if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
|
event->addTimestampPacketNodes(*timestampPacketContainer);
|
||||||
|
}
|
||||||
|
if (this->isProfilingEnabled()) {
|
||||||
|
// Get allocation for timestamps
|
||||||
|
hwTimeStamps = event->getHwTimeStampNode();
|
||||||
|
if (this->isPerfCountersEnabled()) {
|
||||||
|
hwPerfCounter = event->getHwPerfCounterNode()->tagForCpuAccess;
|
||||||
|
// PERF COUNTER: copy current configuration from queue to event
|
||||||
|
event->copyPerfCounters(this->getPerfCountersConfigData());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parentKernel) {
|
||||||
|
parentKernel->createReflectionSurface();
|
||||||
|
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
|
||||||
|
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
|
||||||
|
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
|
||||||
|
if (!blockQueue) {
|
||||||
|
devQueueHw->resetDeviceQueue();
|
||||||
|
devQueueHw->acquireEMCriticalSection();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HardwareInterface<GfxFamily>::dispatchWalker(
|
||||||
|
*this,
|
||||||
|
multiDispatchInfo,
|
||||||
|
csrDeps,
|
||||||
|
&blockedCommandsData,
|
||||||
|
hwTimeStamps,
|
||||||
|
hwPerfCounter,
|
||||||
|
&previousTimestampPacketNodes,
|
||||||
|
timestampPacketContainer.get(),
|
||||||
|
preemption,
|
||||||
|
blockQueue,
|
||||||
|
commandType);
|
||||||
|
|
||||||
|
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||||
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||||
|
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
|
||||||
|
getCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getCommandStreamReceiver().setRequiredScratchSize(multiDispatchInfo.getRequiredScratchSize());
|
||||||
|
}
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
|
||||||
|
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||||
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
|
PreemptionMode preemption,
|
||||||
|
bool &blocking) {
|
||||||
|
size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel);
|
||||||
|
|
||||||
|
uint32_t taskCount = getCommandStreamReceiver().peekTaskCount() + 1;
|
||||||
|
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||||
|
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||||
|
parentKernel,
|
||||||
|
(uint32_t)multiDispatchInfo.size(),
|
||||||
|
taskCount,
|
||||||
|
hwTimeStamps);
|
||||||
|
|
||||||
|
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
|
||||||
|
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
||||||
|
|
||||||
|
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||||
|
devQueueHw->getStackBuffer(),
|
||||||
|
devQueueHw->getEventPoolBuffer(),
|
||||||
|
devQueueHw->getSlbBuffer(),
|
||||||
|
devQueueHw->getDshBuffer(),
|
||||||
|
parentKernel->getKernelReflectionSurface(),
|
||||||
|
devQueueHw->getQueueStorageBuffer(),
|
||||||
|
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
||||||
|
devQueueHw->getDebugQueue());
|
||||||
|
|
||||||
|
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||||
|
*this,
|
||||||
|
*devQueueHw,
|
||||||
|
preemption,
|
||||||
|
scheduler,
|
||||||
|
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||||
|
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
|
||||||
|
|
||||||
|
scheduler.makeResident(getCommandStreamReceiver());
|
||||||
|
|
||||||
|
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getCommandStreamReceiver());
|
||||||
|
if (parentKernel->isAuxTranslationRequired()) {
|
||||||
|
blocking = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) {
|
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) {
|
||||||
auto isQueueBlockedStatus = isQueueBlocked();
|
auto isQueueBlockedStatus = isQueueBlocked();
|
||||||
|
|
Loading…
Reference in New Issue