diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 6da20b5559..4fb15571af 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -252,6 +252,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { inline void handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device); inline void dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device); + inline void handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData); + inline void dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream, + size_t immediateCommandStreamStart, + ImmediateFlushData &flushData, + LinearStream &csrStream); + inline void handleImmediateFlushAllocationsResidency(Device &device); HeapDirtyState dshState; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index dcc7fdc2ba..fa49699d1b 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -305,6 +305,8 @@ CompletionStamp CommandStreamReceiverHw::flushImmediateTask( handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device); handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device); + handleImmediateFlushJumpToImmediate(flushData); + auto &csrCommandStream = getCS(flushData.estimatedSize); dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream); @@ -313,6 +315,8 @@ CompletionStamp CommandStreamReceiverHw::flushImmediateTask( dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device); dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device); + dispatchImmediateFlushJumpToImmediateCommand(immediateCommandStream, immediateCommandStreamStart, flushData, csrCommandStream); + handleImmediateFlushAllocationsResidency(device); CompletionStamp completionStamp = { @@ -2050,4 +2054,25 @@ void CommandStreamReceiverHw::handleImmediateFlushAllocationsResidenc } } +template +void CommandStreamReceiverHw::handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData) { + if (flushData.estimatedSize > 0) { + flushData.estimatedSize += EncodeBatchBufferStartOrEnd::getBatchBufferStartSize(); + flushData.estimatedSize = alignUp(flushData.estimatedSize, MemoryConstants::cacheLineSize); + } +} + +template +void CommandStreamReceiverHw::dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream, + size_t immediateCommandStreamStart, + ImmediateFlushData &flushData, + LinearStream &csrStream) { + if (flushData.estimatedSize > 0) { + uint64_t immediateStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart; + + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&csrStream, immediateStartAddress, false, false, false); + EncodeNoop::alignToCacheLine(csrStream); + } +} + } // namespace NEO diff --git a/shared/test/common/fixtures/command_stream_receiver_fixture.h b/shared/test/common/fixtures/command_stream_receiver_fixture.h index 1dd0d331f2..7f6859720d 100644 --- a/shared/test/common/fixtures/command_stream_receiver_fixture.h +++ b/shared/test/common/fixtures/command_stream_receiver_fixture.h @@ -17,7 +17,7 @@ struct CommandStreamReceiverFixture : public NEO::DeviceFixture { void setUp(); void tearDown(); - static constexpr size_t bufferSize = 256; + static constexpr size_t bufferSize = 1024; uint8_t cmdBuffer[bufferSize]; uint8_t dshBuffer[bufferSize]; uint8_t iohBuffer[bufferSize]; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 74467f961f..0585de09c0 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3781,3 +3781,35 @@ HWTEST2_F(CommandStreamReceiverHwTest, EXPECT_TRUE(commandStreamReceiver.isMadeResident(pDevice->getRTMemoryBackedBuffer())); } + +HWTEST2_F(CommandStreamReceiverHwTest, + givenImmediateFlushTaskWhenCsrHasPreambleCommandsThenDispatchIndirectJumpToImmediateBatchBuffer, + IsAtLeastXeHpCore) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + auto startOffset = commandStream.getUsed(); + *commandStream.getSpaceForCmd() = FamilyType::cmdInitGpgpuWalker; + uint64_t immediateStartAddress = commandStream.getGpuBase() + startOffset; + + commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice); + + HardwareParse hwParserCsr; + hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); + auto bbStartCmd = hwParserCsr.getCommand(); + ASSERT_NE(nullptr, bbStartCmd); + EXPECT_EQ(immediateStartAddress, bbStartCmd->getBatchBufferStartAddress()); + + startOffset = commandStream.getUsed(); + *commandStream.getSpaceForCmd() = FamilyType::cmdInitGpgpuWalker; + + size_t usedSize = commandStreamReceiver.commandStream.getUsed(); + commandStreamReceiver.flushImmediateTask(commandStream, startOffset, immediateFlushTaskFlags, *pDevice); + + hwParserCsr.tearDown(); + hwParserCsr.parseCommands(commandStreamReceiver.commandStream, usedSize); + bbStartCmd = hwParserCsr.getCommand(); + ASSERT_EQ(nullptr, bbStartCmd); +}