Refactoring of per-DSS backed buffer programming

Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
Pawel Wilma 2020-11-26 19:02:18 +00:00 committed by Compute-Runtime-Automation
parent 7716988dbb
commit d94e2bf149
12 changed files with 26 additions and 50 deletions

View File

@ -140,25 +140,6 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingPreambleCm
EXPECT_EQ(expectedDifference, actualDifference);
}
HWTEST_F(UltCommandStreamReceiverTest, givenPerDssBackBufferProgrammingEnabledWhenEstimatingPreambleCmdSizeThenResultIncludesPerDssBackBufferProgramingCommandsSize) {
DebugManagerStateRestore restore;
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy;
commandStreamReceiver.isPreambleSent = false;
auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
commandStreamReceiver.isPreambleSent = true;
auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto actualDifference = preambleNotSent - preambleSent;
auto expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() + PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice) + PreambleHelper<FamilyType>::getPerDssBackedBufferCommandsSize(pDevice->getHardwareInfo());
EXPECT_EQ(expectedDifference, actualDifference);
}
HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) {
typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE;
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;

View File

@ -1550,4 +1550,4 @@ TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) {
EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x4000u);
EXPECT_EQ(BlitterConstants::maxBlitSetWidth, 0x1FF80u);
EXPECT_EQ(BlitterConstants::maxBlitSetHeight, 0x1FFC0u);
}
}

View File

@ -41,8 +41,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::iohState;
using BaseClass::isBlitterDirectSubmissionEnabled;
using BaseClass::isDirectSubmissionEnabled;
using BaseClass::isPerDssBackedBufferSent;
using BaseClass::perDssBackedBuffer;
using BaseClass::programEnginePrologue;
using BaseClass::programPerDssBackedBuffer;
using BaseClass::programPreamble;
using BaseClass::programStateSip;
using BaseClass::requiresInstructionCacheFlush;

View File

@ -301,6 +301,7 @@ class CommandStreamReceiver {
bool isPreambleSent = false;
bool isStateSipSent = false;
bool isEnginePrologueSent = false;
bool isPerDssBackedBufferSent = false;
bool GSBAFor32BitProgrammed = false;
bool bindingTableBaseAddressRequired = false;
bool mediaVfeStateDirty = true;

View File

@ -67,6 +67,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForComputeMode();
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const;
size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo);
bool isComputeModeNeeded() const;
bool isPipelineSelectAlreadyProgrammed() const;
@ -118,6 +119,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags);
void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads);
void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags);

View File

@ -140,12 +140,6 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdSizeForPreamble(
if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
size += PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
}
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
if (!this->isPreambleSent) {
size += PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(device.getHardwareInfo());
}
}
if (!this->isPreambleSent) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
size += PreambleHelper<GfxFamily>::getSemaphoreDelayCommandSize();
@ -339,6 +333,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
programL3(commandStreamCSR, dispatchFlags, newL3Config);
programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config);
programMediaSampler(commandStreamCSR, dispatchFlags);
programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags);
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
PreambleHelper<GfxFamily>::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy);
@ -761,6 +756,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
size += getCmdSizeForPipelineSelect();
size += getCmdSizeForPreemption(dispatchFlags);
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
size += getCmdSizeForEpilogue(dispatchFlags);
size += getCmdsSizeForHardwareContext();
@ -868,8 +864,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cm
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {
if (!this->isPreambleSent) {
GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr;
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse);
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation);
this->isPreambleSent = true;
this->lastSentL3Config = newL3Config;
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
@ -897,6 +892,10 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const {
return 0;
@ -1188,4 +1187,9 @@ inline bool CommandStreamReceiverHw<GfxFamily>::checkDirectSubmissionSupportsEng
return supported;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) {
return 0;
}
} // namespace NEO

View File

@ -49,9 +49,8 @@ struct PreambleHelper {
KernelExecutionType kernelExecutionType);
static void programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo);
static void programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer);
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr);
static void programKernelDebugging(LinearStream *pCommandStream);
static void programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset);
static void programSemaphoreDelay(LinearStream *pCommandStream);
static uint32_t getL3Config(const HardwareInfo &hwInfo, bool useSLM);
static bool isL3Configurable(const HardwareInfo &hwInfo);
@ -61,7 +60,6 @@ struct PreambleHelper {
static size_t getKernelDebuggingCommandsSize(bool debuggingActive);
static void programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo);
static uint32_t getUrbEntryAllocationSize();
static size_t getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo);
static size_t getCmdSizeForPipelineSelect(const HardwareInfo &hwInfo);
static size_t getSemaphoreDelayCommandSize();
static uint32_t getScratchSizeValueToProgramMediaVfeState(uint32_t scratchSize);

View File

@ -33,15 +33,6 @@ template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programGenSpecificPreambleWorkArounds(LinearStream *pCommandStream, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPerDssBackedBuffer(LinearStream *pCommandStream, const HardwareInfo &hwInfo, GraphicsAllocation *perDssBackBufferOffset) {
}
template <typename GfxFamily>
size_t PreambleHelper<GfxFamily>::getPerDssBackedBufferCommandsSize(const HardwareInfo &hwInfo) {
return 0;
}
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programSemaphoreDelay(LinearStream *pCommandStream) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
@ -79,7 +70,7 @@ size_t PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(const HardwareInfo
template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr, GraphicsAllocation *perDssBackedBuffer) {
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) {
programL3(pCommandStream, l3Config);
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
programPreemption(pCommandStream, device, preemptionCsr);
@ -87,9 +78,6 @@ void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, De
programKernelDebugging(pCommandStream);
}
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
if (perDssBackedBuffer != nullptr) {
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
}
programSemaphoreDelay(pCommandStream);
}

View File

@ -124,7 +124,7 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb
MockDevice mockDevice;
PreambleHelper<FamilyType>::programPreamble(&linearStream, mockDevice, l3Config,
ThreadArbitrationPolicy::RoundRobin,
nullptr, nullptr);
nullptr);
parseCommands<FamilyType>(cs);

View File

@ -24,7 +24,7 @@ HWTEST2_F(TglLpSlm, givenTglLpWhenPreambleIsBeingProgrammedThenThreadArbitration
MockDevice mockDevice;
PreambleHelper<TGLLPFamily>::programPreamble(&linearStream, mockDevice, l3Config,
ThreadArbitrationPolicy::RoundRobin,
nullptr, nullptr);
nullptr);
parseCommands<TGLLPFamily>(cs);

View File

@ -80,7 +80,7 @@ SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitratio
MockDevice mockDevice;
PreambleHelper<SKLFamily>::programPreamble(&linearStream, mockDevice, l3Config,
ThreadArbitrationPolicy::RoundRobin,
nullptr, nullptr);
nullptr);
parseCommands<SKLFamily>(cs);

View File

@ -74,7 +74,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, PreambleTest, givenMidThreadPreemptionWhenPreambleIs
MockGraphicsAllocation csrSurface(reinterpret_cast<void *>(minCsrAlignment), 1024);
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U,
ThreadArbitrationPolicy::RoundRobin, &csrSurface, nullptr);
ThreadArbitrationPolicy::RoundRobin, &csrSurface);
PreemptionHelper::programStateSip<FamilyType>(preemptionStream, *mockDevice);
@ -144,7 +144,7 @@ HWTEST_F(PreambleTest, givenKernelDebuggingActiveWhenPreambleIsProgrammedThenPro
LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size());
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U,
ThreadArbitrationPolicy::RoundRobin, nullptr, nullptr);
ThreadArbitrationPolicy::RoundRobin, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(preambleStream);
@ -158,7 +158,7 @@ HWTEST_F(PreambleTest, givenKernelDebuggingActiveWhenPreambleIsProgrammedThenPro
StackVec<char, 8192> preambleBuffer2(8192);
preambleStream.replaceBuffer(&*preambleBuffer2.begin(), preambleBuffer2.size());
PreambleHelper<FamilyType>::programPreamble(&preambleStream, *mockDevice, 0U,
ThreadArbitrationPolicy::RoundRobin, preemptionAllocation, nullptr);
ThreadArbitrationPolicy::RoundRobin, preemptionAllocation);
HardwareParse hwParser2;
hwParser2.parseCommands<FamilyType>(preambleStream);
cmdList = hwParser2.getCommandsList<MI_LOAD_REGISTER_IMM>();