Refactoring of per-DSS backed buffer programming

Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
Pawel Wilma
2020-11-26 19:02:18 +00:00
committed by Compute-Runtime-Automation
parent 7716988dbb
commit d94e2bf149
12 changed files with 26 additions and 50 deletions

View File

@@ -301,6 +301,7 @@ class CommandStreamReceiver {
bool isPreambleSent = false;
bool isStateSipSent = false;
bool isEnginePrologueSent = false;
bool isPerDssBackedBufferSent = false;
bool GSBAFor32BitProgrammed = false;
bool bindingTableBaseAddressRequired = false;
bool mediaVfeStateDirty = true;

View File

@@ -67,6 +67,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForComputeMode();
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
bool isComputeModeNeeded() const;
bool isPipelineSelectAlreadyProgrammed() const;
@@ -118,6 +119,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags);
void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);

View File

@@ -140,12 +140,6 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdSizeForPreamble(
if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
size += PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
}
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
if (!this->isPreambleSent) {
size += PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(device.getHardwareInfo());
}
}
if (!this->isPreambleSent) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
size += PreambleHelper<GfxFamily>::getSemaphoreDelayCommandSize();
@@ -339,6 +333,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programL3(commandStreamCSR, dispatchFlags, newL3Config);
programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config);
programMediaSampler(commandStreamCSR, dispatchFlags);
programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags);
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
PreambleHelper<GfxFamily>::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy);
@@ -761,6 +756,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
size += getCmdSizeForPipelineSelect();
size += getCmdSizeForPreemption(dispatchFlags);
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
size += getCmdSizeForEpilogue(dispatchFlags);
size += getCmdsSizeForHardwareContext();
@@ -868,8 +864,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cm
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {
if (!this->isPreambleSent) {
GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr;
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse);
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation);
this->isPreambleSent = true;
this->lastSentL3Config = newL3Config;
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
@@ -897,6 +892,10 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const {
return 0;
@@ -1188,4 +1187,9 @@ inline bool CommandStreamReceiverHw<GfxFamily>::checkDirectSubmissionSupportsEng
return supported;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) {
return 0;
}
} // namespace NEO

View File

@@ -49,9 +49,8 @@ struct PreambleHelper {
KernelExecutionType kernelExecutionType);
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr);
static void programKernelDebugging(LinearStream *pCommandStream);
static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset);
static void programSemaphoreDelay(LinearStream *pCommandStream);
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
static bool isL3Configurable(const HardwareInfo &hwInfo);
@@ -61,7 +60,6 @@ struct PreambleHelper {
static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
static size_t getSemaphoreDelayCommandSize();
static uint32_t getScratchSizeValueToProgramMediaVfeState(uint32_t scratchSize);

View File

@@ -33,15 +33,6 @@ template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) {
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) {
return 0;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programSemaphoreDelay(LinearStream *pCommandStream) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
@@ -79,7 +70,7 @@ size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) {
programL3(pCommandStream, l3Config);
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
programPreemption(pCommandStream, device, preemptionCsr);
@@ -87,9 +78,6 @@ void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, De
programKernelDebugging(pCommandStream);
}
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
if (perDssBackedBuffer != nullptr) {
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
}
programSemaphoreDelay(pCommandStream);
}