performance: immediate flush add jump to batch buffer when preamble is present
Related-To: NEO-7808 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
0d34323932
commit
bd15d067d5
|
@ -252,6 +252,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||||
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
|
inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device);
|
||||||
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
|
inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device);
|
||||||
|
|
||||||
|
inline void handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData);
|
||||||
|
inline void dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
|
||||||
|
size_t immediateCommandStreamStart,
|
||||||
|
ImmediateFlushData &flushData,
|
||||||
|
LinearStream &csrStream);
|
||||||
|
|
||||||
inline void handleImmediateFlushAllocationsResidency(Device &device);
|
inline void handleImmediateFlushAllocationsResidency(Device &device);
|
||||||
|
|
||||||
HeapDirtyState dshState;
|
HeapDirtyState dshState;
|
||||||
|
|
|
@ -305,6 +305,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
||||||
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
|
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
|
||||||
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
|
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
|
||||||
|
|
||||||
|
handleImmediateFlushJumpToImmediate(flushData);
|
||||||
|
|
||||||
auto &csrCommandStream = getCS(flushData.estimatedSize);
|
auto &csrCommandStream = getCS(flushData.estimatedSize);
|
||||||
|
|
||||||
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
|
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
|
||||||
|
@ -313,6 +315,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
|
||||||
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
|
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
|
||||||
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
|
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
|
||||||
|
|
||||||
|
dispatchImmediateFlushJumpToImmediateCommand(immediateCommandStream, immediateCommandStreamStart, flushData, csrCommandStream);
|
||||||
|
|
||||||
handleImmediateFlushAllocationsResidency(device);
|
handleImmediateFlushAllocationsResidency(device);
|
||||||
|
|
||||||
CompletionStamp completionStamp = {
|
CompletionStamp completionStamp = {
|
||||||
|
@ -2050,4 +2054,25 @@ void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidenc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData) {
|
||||||
|
if (flushData.estimatedSize > 0) {
|
||||||
|
flushData.estimatedSize += EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
|
||||||
|
flushData.estimatedSize = alignUp(flushData.estimatedSize, MemoryConstants::cacheLineSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
|
||||||
|
size_t immediateCommandStreamStart,
|
||||||
|
ImmediateFlushData &flushData,
|
||||||
|
LinearStream &csrStream) {
|
||||||
|
if (flushData.estimatedSize > 0) {
|
||||||
|
uint64_t immediateStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart;
|
||||||
|
|
||||||
|
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&csrStream, immediateStartAddress, false, false, false);
|
||||||
|
EncodeNoop<GfxFamily>::alignToCacheLine(csrStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
|
@ -17,7 +17,7 @@ struct CommandStreamReceiverFixture : public NEO::DeviceFixture {
|
||||||
void setUp();
|
void setUp();
|
||||||
void tearDown();
|
void tearDown();
|
||||||
|
|
||||||
static constexpr size_t bufferSize = 256;
|
static constexpr size_t bufferSize = 1024;
|
||||||
uint8_t cmdBuffer[bufferSize];
|
uint8_t cmdBuffer[bufferSize];
|
||||||
uint8_t dshBuffer[bufferSize];
|
uint8_t dshBuffer[bufferSize];
|
||||||
uint8_t iohBuffer[bufferSize];
|
uint8_t iohBuffer[bufferSize];
|
||||||
|
|
|
@ -3781,3 +3781,35 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
||||||
|
|
||||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
|
EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||||
|
givenImmediateFlushTaskWhenCsrHasPreambleCommandsThenDispatchIndirectJumpToImmediateBatchBuffer,
|
||||||
|
IsAtLeastXeHpCore) {
|
||||||
|
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||||
|
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||||
|
|
||||||
|
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
auto startOffset = commandStream.getUsed();
|
||||||
|
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
|
||||||
|
uint64_t immediateStartAddress = commandStream.getGpuBase() + startOffset;
|
||||||
|
|
||||||
|
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
|
||||||
|
|
||||||
|
HardwareParse hwParserCsr;
|
||||||
|
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||||
|
auto bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
|
||||||
|
ASSERT_NE(nullptr, bbStartCmd);
|
||||||
|
EXPECT_EQ(immediateStartAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||||
|
|
||||||
|
startOffset = commandStream.getUsed();
|
||||||
|
*commandStream.getSpaceForCmd<COMPUTE_WALKER>() = FamilyType::cmdInitGpgpuWalker;
|
||||||
|
|
||||||
|
size_t usedSize = commandStreamReceiver.commandStream.getUsed();
|
||||||
|
commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice);
|
||||||
|
|
||||||
|
hwParserCsr.tearDown();
|
||||||
|
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedSize);
|
||||||
|
bbStartCmd = hwParserCsr.getCommand<MI_BATCH_BUFFER_START>();
|
||||||
|
ASSERT_EQ(nullptr, bbStartCmd);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue