Unify programming of ending commands in direct submission path

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-11-04 14:56:15 +00:00
committed by Compute-Runtime-Automation
parent 9f465426e8
commit 67af920281
12 changed files with 23 additions and 35 deletions

View File

@@ -53,7 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdsSizeForHardwareContext() const override;
static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation);
void programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled);
void programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled, bool sipWaAllowed);
void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary);
size_t getRequiredStateBaseAddressSize(const Device &device) const;

View File

@@ -100,7 +100,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferEnd(LinearStream &
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled) {
inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled, bool sipWaAllowed) {
if (directSubmissionEnabled) {
uint64_t startAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed();
if (DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.get() == 0) {
@@ -114,8 +114,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &c
addBatchBufferStart(&cmd, startAddress, false);
*bbStart = cmd;
} else {
if (!EngineHelpers::isBcs(osContext->getEngineType())) {
PreemptionHelper::programStateSipEndWa<GfxFamily>(commandStream, device);
if (sipWaAllowed) {
PreemptionHelper::programStateSipEndWa<GfxFamily>(commandStream, peekHwInfo(), executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->debugger.get());
}
this->addBatchBufferEnd(commandStream, patchLocation);
}
@@ -560,7 +560,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
GraphicsAllocation *chainedBatchBuffer = nullptr;
bool directSubmissionEnabled = isDirectSubmissionEnabled();
if (submitTask) {
programEndingCmd(commandStreamTask, device, &bbEndLocation, directSubmissionEnabled);
programEndingCmd(commandStreamTask, &bbEndLocation, directSubmissionEnabled, true);
EncodeNoop<GfxFamily>::emitNoop(commandStreamTask, bbEndPaddingSize);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamTask);
@@ -591,7 +591,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->programEpilogue(commandStreamCSR, device, &bbEndLocation, dispatchFlags);
} else if (submitCSR) {
programEndingCmd(commandStreamCSR, device, &bbEndLocation, directSubmissionEnabled);
programEndingCmd(commandStreamCSR, &bbEndLocation, directSubmissionEnabled, true);
EncodeNoop<GfxFamily>::emitNoop(commandStreamCSR, bbEndPaddingSize);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
DEBUG_BREAK_IF(commandStreamCSR.getUsed() > commandStreamCSR.getMaxAvailableSpace());
@@ -1149,7 +1149,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
}
void *endingCmdPtr = nullptr;
programEndingCmd(commandStream, device, &endingCmdPtr, blitterDirectSubmission);
programEndingCmd(commandStream, &endingCmdPtr, blitterDirectSubmission, false);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStream);
@@ -1253,16 +1253,7 @@ void CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream &commandStr
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
void *endingCmdPtr = nullptr;
if (isAnyDirectSubmissionEnabled()) {
endingCmdPtr = commandStreamTask.getSpace(0);
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStreamTask,
0ull,
false);
} else {
auto batchBufferEnd = commandStreamTask.getSpaceForCmd<MI_BATCH_BUFFER_END>();
*batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd;
}
programEndingCmd(commandStreamTask, &endingCmdPtr, isAnyDirectSubmissionEnabled(), false);
auto bytesToPad = EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize() -
EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferEndSize();
@@ -1338,7 +1329,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programEpilogue(LinearStream &cs
addBatchBufferStart(reinterpret_cast<typename GfxFamily::MI_BATCH_BUFFER_START *>(*batchBufferEndLocation), gpuAddress, false);
this->programEpliogueCommands(csr, dispatchFlags);
programEndingCmd(csr, device, batchBufferEndLocation, isDirectSubmissionEnabled());
programEndingCmd(csr, batchBufferEndLocation, isDirectSubmissionEnabled(), !EngineHelpers::isBcs(osContext->getEngineType()));
EncodeNoop<GfxFamily>::alignToCacheLine(csr);
}
}

View File

@@ -60,7 +60,7 @@ class PreemptionHelper {
static void programStateSip(LinearStream &preambleCmdStream, Device &device, LogicalStateHelper *logicalStateHelper);
template <typename GfxFamily>
static void programStateSipEndWa(LinearStream &cmdStream, Device &device);
static void programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive);
template <typename GfxFamily>
static size_t getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode);

View File

@@ -58,7 +58,7 @@ void PreemptionHelper::programStateSipCmd(LinearStream &preambleCmdStream, Graph
}
template <typename GfxFamily>
void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) {}
void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive) {}
template <typename GfxFamily>
void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode,

View File

@@ -43,13 +43,12 @@ void PreemptionHelper::programStateSip<GfxFamily>(LinearStream &preambleCmdStrea
}
template <>
void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, Device &device) {
void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive) {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
bool debuggingEnabled = device.getDebugger() != nullptr;
if (debuggingEnabled) {
HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper.isSipWANeeded(device.getHardwareInfo())) {
if (debuggerActive) {
HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (hwHelper.isSipWANeeded(hwInfo)) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(cmdStream, args);

View File

@@ -26,6 +26,6 @@ template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, Device &device);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive);
} // namespace NEO

View File

@@ -26,5 +26,5 @@ template size_t PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(const Device
template void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pCommandStream, const Device &device);
template void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTERFACE_DESCRIPTOR_DATA<GfxFamily> *idd, PreemptionMode preemptionMode);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, Device &device);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive);
} // namespace NEO

View File

@@ -118,5 +118,5 @@ void PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(INTER
}
template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, Device &device);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive);
} // namespace NEO

View File

@@ -76,6 +76,6 @@ template void PreemptionHelper::programStateSip<GfxFamily>(LinearStream &preambl
template void PreemptionHelper::programStateSipCmd<GfxFamily>(LinearStream &preambleCmdStream, GraphicsAllocation *sipAllocation, LogicalStateHelper *logicalStateHelper);
template size_t PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(Device &device, bool isRcs);
template size_t PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, Device &device);
template void PreemptionHelper::programStateSipEndWa<GfxFamily>(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive);
} // namespace NEO

View File

@@ -676,8 +676,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAvailableWhenProgrammingEndi
uint8_t buffer[128];
mockCsr->commandStream.replaceBuffer(&buffer[0], 128u);
mockCsr->commandStream.replaceGraphicsAllocation(&mockAllocation);
auto &device = *pDevice;
mockCsr->programEndingCmd(mockCsr->commandStream, device, &location, ret);
mockCsr->programEndingCmd(mockCsr->commandStream, &location, ret, true);
EXPECT_EQ(sizeof(MI_BATCH_BUFFER_START), mockCsr->commandStream.getUsed());
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -711,7 +710,6 @@ HWTEST_F(DirectSubmissionTest, givenDebugFlagSetWhenProgrammingEndingCommandThen
auto &cmdStream = mockCsr->commandStream;
cmdStream.replaceBuffer(&buffer[0], 256);
cmdStream.replaceGraphicsAllocation(&mockAllocation);
auto &device = *pDevice;
for (int32_t value : {-1, 0, 1}) {
DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.set(value);
@@ -719,7 +717,7 @@ HWTEST_F(DirectSubmissionTest, givenDebugFlagSetWhenProgrammingEndingCommandThen
auto currectBbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(cmdStream.getSpace(0));
uint64_t expectedGpuVa = cmdStream.getGraphicsAllocation()->getGpuAddress() + cmdStream.getUsed();
mockCsr->programEndingCmd(cmdStream, device, &location, ret);
mockCsr->programEndingCmd(cmdStream, &location, ret, true);
EncodeNoop<FamilyType>::alignToCacheLine(cmdStream);
if (value == 0) {

View File

@@ -82,7 +82,7 @@ XEHPTEST_F(PreemptionXeHPTest, givenRevisionA0toBWhenProgrammingSipEndWaThenGlob
StackVec<char, 1024> streamStorage(1024);
LinearStream cmdStream{streamStorage.begin(), streamStorage.size()};
PreemptionHelper::programStateSipEndWa<FamilyType>(cmdStream, *mockDevice);
PreemptionHelper::programStateSipEndWa<FamilyType>(cmdStream, hwInfo, true);
EXPECT_NE(0U, cmdStream.getUsed());
GenCmdList cmdList;