performance: immediate flush add jump to batch buffer when preamble is present

Related-To: NEO-7808

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2023-06-22 13:17:49 +00:00 committed by Compute-Runtime-Automation
parent 0d34323932
commit bd15d067d5
4 changed files with 64 additions and 1 deletions

View File

@ -252,6 +252,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
inline void handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData);
inline void dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateFlushData &flushData,
LinearStream &csrStream);
inline void handleImmediateFlushAllocationsResidency(Device &device);
HeapDirtyState dshState;

View File

@ -305,6 +305,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
handleImmediateFlushJumpToImmediate(flushData);
auto &csrCommandStream = getCS(flushData.estimatedSize);
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
@ -313,6 +315,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
dispatchImmediateFlushJumpToImmediateCommand(immediateCommandStream, immediateCommandStreamStart, flushData, csrCommandStream);
handleImmediateFlushAllocationsResidency(device);
CompletionStamp completionStamp = {
@ -2050,4 +2054,25 @@ void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidenc
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData) {
if (flushData.estimatedSize > 0) {
flushData.estimatedSize += EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
flushData.estimatedSize = alignUp(flushData.estimatedSize, MemoryConstants::cacheLineSize);
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateFlushData &flushData,
LinearStream &csrStream) {
if (flushData.estimatedSize > 0) {
uint64_t immediateStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart;
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&csrStream, immediateStartAddress, false, false, false);
EncodeNoop<GfxFamily>::alignToCacheLine(csrStream);
}
}
} // namespace NEO

View File

@ -17,7 +17,7 @@ struct CommandStreamReceiverFixture : public NEO::DeviceFixture {
void setUp();
void tearDown();
static constexpr size_t bufferSize = 256;
static constexpr size_t bufferSize = 1024;
uint8_t cmdBuffer[bufferSize];
uint8_t dshBuffer[bufferSize];
uint8_t iohBuffer[bufferSize];

View File

@ -3781,3 +3781,35 @@ HWTEST2_F(CommandStreamReceiverHwTest,
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
}
HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskWhenCsrHasPreambleCommandsThenDispatchIndirectJumpToImmediateBatchBuffer,
IsAtLeastXeHpCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto startOffset = commandStream.getUsed();
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
uint64_t immediateStartAddress = commandStream.getGpuBase() + startOffset;
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
ASSERT_NE(nullptr, bbStartCmd);
EXPECT_EQ(immediateStartAddress, bbStartCmd->getBatchBufferStartAddress());
startOffset = commandStream.getUsed();
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
hwParserCsr.tearDown();
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
ASSERT_EQ(nullptr, bbStartCmd);
}