Preamble Helper Refactor

Change-Id: Iacd05dcb6d9047fc2814895fa87d1cd9be6df446
Signed-off-by: Daria Hinz <daria.hinz@intel.com>
This commit is contained in:
Daria Hinz
2019-09-10 16:13:11 +02:00
committed by sys_ocldev
parent 8daf59c246
commit ae4425351f
16 changed files with 61 additions and 36 deletions

View File

@@ -676,7 +676,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.useSLM = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel();
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL;
dispatchFlags.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.requiresCoherency = requiresCoherency;
dispatchFlags.lowPriority = (QueuePriority::LOW == priority);
dispatchFlags.throttle = getThrottle();
@@ -689,7 +689,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver());
}
dispatchFlags.numGrfRequired = numGrfRequired;
dispatchFlags.specialPipelineSelectMode = specialPipelineSelectMode;
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
dispatchFlags.multiEngineQueue = this->multiEngineQueue;
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@@ -78,7 +78,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags);
void programPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs);
void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags);
void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);

View File

@@ -215,9 +215,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast<int8_t>(dispatchFlags.requiresCoherency);
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.mediaSamplerRequired);
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired;
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.specialPipelineSelectMode;
csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode;
auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations();
bool stateBaseAddressDirty = false;
@@ -264,7 +264,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programPreemption(commandStreamCSR, dispatchFlags);
programComputeMode(commandStreamCSR, dispatchFlags);
programL3(commandStreamCSR, dispatchFlags, newL3Config);
programPipelineSelect(commandStreamCSR, dispatchFlags);
programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs);
programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config);
programMediaSampler(commandStreamCSR, dispatchFlags);
@@ -607,7 +607,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForL3Config();
size += getCmdSizeForComputeMode();
size += getCmdSizeForMediaSampler(dispatchFlags.mediaSamplerRequired);
size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
size += getCmdSizeForPipelineSelect();
size += getCmdSizeForPreemption(dispatchFlags);
size += getCmdSizeForEpilogue(dispatchFlags);
@@ -635,18 +635,12 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect() const {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT;
size_t size = 0;
size_t size = 0;
if (csrSizeRequestFlags.mediaSamplerConfigChanged ||
csrSizeRequestFlags.specialPipelineSelectModeChanged ||
!isPreambleSent) {
size += sizeof(PIPELINE_SELECT);
if (HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(peekHwInfo())) {
size += sizeof(PIPE_CONTROL);
}
size += PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(peekHwInfo());
}
return size;
}

View File

@@ -46,10 +46,10 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForL3Config() const
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) {
if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) {
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, dispatchFlags, peekHwInfo());
this->lastMediaSamplerConfig = dispatchFlags.mediaSamplerRequired;
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo());
this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired;
}
}

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "core/helpers/pipeline_select_args.h"
#include "core/memory_manager/memory_constants.h"
#include "runtime/helpers/csr_deps.h"
#include "runtime/helpers/hw_info.h"
@@ -38,6 +39,7 @@ constexpr uint32_t l3AndL1On = 2u;
struct DispatchFlags {
CsrDependencies csrDependencies;
PipelineSelectArgs pipelineSelectArgs;
FlushStampTrackingObj *flushStampReference = nullptr;
QueueThrottle throttle = QueueThrottle::MEDIUM;
PreemptionMode preemptionMode = PreemptionMode::Disabled;
@@ -48,12 +50,10 @@ struct DispatchFlags {
bool useSLM = false;
bool guardCommandBufferWithPipeControl = false;
bool GSBA32BitRequired = false;
bool mediaSamplerRequired = false;
bool requiresCoherency = false;
bool lowPriority = false;
bool implicitFlush = false;
bool outOfOrderExecutionAllowed = false;
bool specialPipelineSelectMode = false;
bool multiEngineQueue = false;
bool epilogueRequired = false;
};

View File

@@ -37,7 +37,7 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
using PWR_CLK_STATE_REGISTER = Family::PWR_CLK_STATE_REGISTER;
if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) {
if (dispatchFlags.mediaSamplerRequired) {
if (dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) {
if (!lastVmeSubslicesConfig) {
auto pc = addPipeControlCmd(stream);
pc->setDcFlushEnable(true);
@@ -108,7 +108,7 @@ template <>
bool CommandStreamReceiverHw<Family>::detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const {
bool flag = DebugManager.flags.ForceCsrReprogramming.get();
if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) {
if (!dispatchFlags.mediaSamplerRequired) {
if (!dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) {
if (lastVmeSubslicesConfig) {
flag = true;
}

View File

@@ -29,7 +29,7 @@ uint32_t PreambleHelper<ICLFamily>::getL3Config(const HardwareInfo &hwInfo, bool
template <>
void PreambleHelper<ICLFamily>::programPipelineSelect(LinearStream *pCommandStream,
const DispatchFlags &dispatchFlags,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo) {
typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT;
@@ -43,8 +43,8 @@ void PreambleHelper<ICLFamily>::programPipelineSelect(LinearStream *pCommandStre
pCmd->setMaskBits(mask);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
pCmd->setMediaSamplerPowerClockGateDisable(dispatchFlags.mediaSamplerRequired);
pCmd->setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired);
pCmd->setMediaSamplerPowerClockGateDisable(pipelineSelectArgs.mediaSamplerRequired);
}
template <>

View File

@@ -38,7 +38,7 @@ bool PreambleHelper<BDWFamily>::isL3Configurable(const HardwareInfo &hwInfo) {
template <>
void PreambleHelper<BDWFamily>::programPipelineSelect(LinearStream *pCommandStream,
const DispatchFlags &dispatchFlags,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo) {
typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT;

View File

@@ -34,7 +34,7 @@ bool PreambleHelper<SKLFamily>::isL3Configurable(const HardwareInfo &hwInfo) {
template <>
void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStream,
const DispatchFlags &dispatchFlags,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo) {
typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT;
@@ -45,7 +45,7 @@ void PreambleHelper<SKLFamily>::programPipelineSelect(LinearStream *pCommandStre
auto mask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits;
pCmd->setMaskBits(mask);
pCmd->setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU);
pCmd->setMediaSamplerDopClockGateEnable(!dispatchFlags.mediaSamplerRequired);
pCmd->setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired);
}
template <>

View File

@@ -20,6 +20,7 @@ class Device;
struct DispatchFlags;
class GraphicsAllocation;
class LinearStream;
struct PipelineSelectArgs;
template <typename GfxFamily>
struct PreambleHelper {
@@ -28,7 +29,7 @@ struct PreambleHelper {
static void programL3(LinearStream *pCommandStream, uint32_t l3Config);
static void programPipelineSelect(LinearStream *pCommandStream,
const DispatchFlags &dispatchFlags,
const PipelineSelectArgs &pipelineSelectArgs,
const HardwareInfo &hwInfo);
static uint32_t getDefaultThreadArbitrationPolicy();
static void programThreadArbitration(LinearStream *pCommandStream, uint32_t requiredThreadArbitrationPolicy);
@@ -51,7 +52,10 @@ struct PreambleHelper {
static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
};
template <PRODUCT_FAMILY ProductFamily>

View File

@@ -9,6 +9,7 @@
#include "core/helpers/aligned_memory.h"
#include "runtime/command_stream/preemption.h"
#include "runtime/device/device.h"
#include "runtime/helpers/hardware_commands_helper.h"
#include "runtime/helpers/preamble.h"
#include "runtime/kernel/kernel.h"
@@ -53,6 +54,17 @@ size_t PreambleHelper<GfxFamily>::getAdditionalCommandsSize(const Device &device
return totalSize;
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo) {
size_t size = 0;
using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT;
size += sizeof(PIPELINE_SELECT);
if (HardwareCommandsHelper<GfxFamily>::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) {
size += sizeof(PIPE_CONTROL);
}
return size;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {

View File

@@ -184,14 +184,13 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = commandQueue.getThrottle();
dispatchFlags.preemptionMode = preemptionMode;
dispatchFlags.mediaSamplerRequired = kernel->isVmeKernel();
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = kernel->isVmeKernel();
dispatchFlags.multiEngineQueue = commandQueue.isMultiEngineQueue();
dispatchFlags.numGrfRequired = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
dispatchFlags.csrDependencies.fillFromEventsRequest(eventsRequest, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
}
dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
if (anyUncacheableArgs) {
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
} else if (!kernel->areStatelessWritesUsed()) {