performance: immediate flush add jump to batch buffer when preamble is present
Related-To: NEO-7808 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
0d34323932
commit
bd15d067d5
|
@ -252,6 +252,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
|
||||
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
|
||||
|
||||
inline void handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData);
|
||||
inline void dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
|
||||
size_t immediateCommandStreamStart,
|
||||
ImmediateFlushData &flushData,
|
||||
LinearStream &csrStream);
|
||||
|
||||
inline void handleImmediateFlushAllocationsResidency(Device &device);
|
||||
|
||||
HeapDirtyState dshState;
|
||||
|
|
|
@ -305,6 +305,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
|||
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
|
||||
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
|
||||
|
||||
handleImmediateFlushJumpToImmediate(flushData);
|
||||
|
||||
auto &csrCommandStream = getCS(flushData.estimatedSize);
|
||||
|
||||
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
|
||||
|
@ -313,6 +315,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
|||
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
|
||||
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
|
||||
|
||||
dispatchImmediateFlushJumpToImmediateCommand(immediateCommandStream, immediateCommandStreamStart, flushData, csrCommandStream);
|
||||
|
||||
handleImmediateFlushAllocationsResidency(device);
|
||||
|
||||
CompletionStamp completionStamp = {
|
||||
|
@ -2050,4 +2054,25 @@ void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidenc
|
|||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData) {
|
||||
if (flushData.estimatedSize > 0) {
|
||||
flushData.estimatedSize += EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
|
||||
flushData.estimatedSize = alignUp(flushData.estimatedSize, MemoryConstants::cacheLineSize);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
|
||||
size_t immediateCommandStreamStart,
|
||||
ImmediateFlushData &flushData,
|
||||
LinearStream &csrStream) {
|
||||
if (flushData.estimatedSize > 0) {
|
||||
uint64_t immediateStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart;
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&csrStream, immediateStartAddress, false, false, false);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(csrStream);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -17,7 +17,7 @@ struct CommandStreamReceiverFixture : public NEO::DeviceFixture {
|
|||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
static constexpr size_t bufferSize = 256;
|
||||
static constexpr size_t bufferSize = 1024;
|
||||
uint8_t cmdBuffer[bufferSize];
|
||||
uint8_t dshBuffer[bufferSize];
|
||||
uint8_t iohBuffer[bufferSize];
|
||||
|
|
|
@ -3781,3 +3781,35 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
|||
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
givenImmediateFlushTaskWhenCsrHasPreambleCommandsThenDispatchIndirectJumpToImmediateBatchBuffer,
|
||||
IsAtLeastXeHpCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
auto startOffset = commandStream.getUsed();
|
||||
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
|
||||
uint64_t immediateStartAddress = commandStream.getGpuBase() + startOffset;
|
||||
|
||||
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
|
||||
|
||||
HardwareParse hwParserCsr;
|
||||
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
auto bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
EXPECT_EQ(immediateStartAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
startOffset = commandStream.getUsed();
|
||||
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
|
||||
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
|
||||
|
||||
hwParserCsr.tearDown();
|
||||
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
|
||||
bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
|
||||
ASSERT_EQ(nullptr, bbStartCmd);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue