diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 1f2d9ad2b4..eb5f67783e 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -791,7 +791,7 @@ void CommandQueueHw::programStateSipEndWA(bool isStateSipRequired return; } NEO::Device *neoDevice = this->device->getNEODevice(); - NEO::PreemptionHelper::programStateSipEndWa(cmdStream, *neoDevice); + NEO::PreemptionHelper::programStateSipEndWa(cmdStream, neoDevice->getHardwareInfo(), neoDevice->getDebugger()); } template diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index ab005113e9..5f83675fa9 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -53,7 +53,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { size_t getCmdsSizeForHardwareContext() const override; static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation); - void programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled); + void programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled, bool sipWaAllowed); void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary); size_t getRequiredStateBaseAddressSize(const Device &device) const; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 72cd1eb7ed..7e84d94985 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -100,7 +100,7 @@ inline void CommandStreamReceiverHw::addBatchBufferEnd(LinearStream & } template -inline void CommandStreamReceiverHw::programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled) { +inline void CommandStreamReceiverHw::programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled, bool sipWaAllowed) { if (directSubmissionEnabled) { uint64_t startAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(); if (DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.get() == 0) { @@ -114,8 +114,8 @@ inline void CommandStreamReceiverHw::programEndingCmd(LinearStream &c addBatchBufferStart(&cmd, startAddress, false); *bbStart = cmd; } else { - if (!EngineHelpers::isBcs(osContext->getEngineType())) { - PreemptionHelper::programStateSipEndWa(commandStream, device); + if (sipWaAllowed) { + PreemptionHelper::programStateSipEndWa(commandStream, peekHwInfo(), executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->debugger.get()); } this->addBatchBufferEnd(commandStream, patchLocation); } @@ -560,7 +560,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( GraphicsAllocation *chainedBatchBuffer = nullptr; bool directSubmissionEnabled = isDirectSubmissionEnabled(); if (submitTask) { - programEndingCmd(commandStreamTask, device, &bbEndLocation, directSubmissionEnabled); + programEndingCmd(commandStreamTask, &bbEndLocation, directSubmissionEnabled, true); EncodeNoop::emitNoop(commandStreamTask, bbEndPaddingSize); EncodeNoop::alignToCacheLine(commandStreamTask); @@ -591,7 +591,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( this->programEpilogue(commandStreamCSR, device, &bbEndLocation, dispatchFlags); } else if (submitCSR) { - programEndingCmd(commandStreamCSR, device, &bbEndLocation, directSubmissionEnabled); + programEndingCmd(commandStreamCSR, &bbEndLocation, directSubmissionEnabled, true); EncodeNoop::emitNoop(commandStreamCSR, bbEndPaddingSize); EncodeNoop::alignToCacheLine(commandStreamCSR); DEBUG_BREAK_IF(commandStreamCSR.getUsed() > commandStreamCSR.getMaxAvailableSpace()); @@ -1149,7 +1149,7 @@ uint32_t CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesCo } void *endingCmdPtr = nullptr; - programEndingCmd(commandStream, device, &endingCmdPtr, blitterDirectSubmission); + programEndingCmd(commandStream, &endingCmdPtr, blitterDirectSubmission, false); EncodeNoop::alignToCacheLine(commandStream); @@ -1253,16 +1253,7 @@ void CommandStreamReceiverHw::flushSmallTask(LinearStream &commandStr using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; void *endingCmdPtr = nullptr; - - if (isAnyDirectSubmissionEnabled()) { - endingCmdPtr = commandStreamTask.getSpace(0); - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&commandStreamTask, - 0ull, - false); - } else { - auto batchBufferEnd = commandStreamTask.getSpaceForCmd(); - *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; - } + programEndingCmd(commandStreamTask, &endingCmdPtr, isAnyDirectSubmissionEnabled(), false); auto bytesToPad = EncodeBatchBufferStartOrEnd::getBatchBufferStartSize() - EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); @@ -1338,7 +1329,7 @@ inline void CommandStreamReceiverHw::programEpilogue(LinearStream &cs addBatchBufferStart(reinterpret_cast(*batchBufferEndLocation), gpuAddress, false); this->programEpliogueCommands(csr, dispatchFlags); - programEndingCmd(csr, device, batchBufferEndLocation, isDirectSubmissionEnabled()); + programEndingCmd(csr, batchBufferEndLocation, isDirectSubmissionEnabled(), !EngineHelpers::isBcs(osContext->getEngineType())); EncodeNoop::alignToCacheLine(csr); } } diff --git a/shared/source/command_stream/preemption.h b/shared/source/command_stream/preemption.h index 81dd2d1634..ef2806185b 100644 --- a/shared/source/command_stream/preemption.h +++ b/shared/source/command_stream/preemption.h @@ -60,7 +60,7 @@ class PreemptionHelper { static void programStateSip(LinearStream &preambleCmdStream, Device &device, LogicalStateHelper *logicalStateHelper); template - static void programStateSipEndWa(LinearStream &cmdStream, Device &device); + static void programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive); template static size_t getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); diff --git a/shared/source/command_stream/preemption.inl b/shared/source/command_stream/preemption.inl index a3b1bb6a01..fdaff95661 100644 --- a/shared/source/command_stream/preemption.inl +++ b/shared/source/command_stream/preemption.inl @@ -58,7 +58,7 @@ void PreemptionHelper::programStateSipCmd(LinearStream &preambleCmdStream, Graph } template -void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) {} +void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive) {} template void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, diff --git a/shared/source/command_stream/preemption_xehp_and_later.inl b/shared/source/command_stream/preemption_xehp_and_later.inl index acc0943f15..91259e1e63 100644 --- a/shared/source/command_stream/preemption_xehp_and_later.inl +++ b/shared/source/command_stream/preemption_xehp_and_later.inl @@ -43,13 +43,12 @@ void PreemptionHelper::programStateSip(LinearStream &preambleCmdStrea } template <> -void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) { +void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive) { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; - bool debuggingEnabled = device.getDebugger() != nullptr; - if (debuggingEnabled) { - HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); - if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { + if (debuggerActive) { + HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (hwHelper.isSipWANeeded(hwInfo)) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addSingleBarrier(cmdStream, args); diff --git a/shared/source/gen11/preemption_gen11.cpp b/shared/source/gen11/preemption_gen11.cpp index 7b38182108..f6d36e77de 100644 --- a/shared/source/gen11/preemption_gen11.cpp +++ b/shared/source/gen11/preemption_gen11.cpp @@ -26,6 +26,6 @@ template size_t PreemptionHelper::getPreemptionWaCsSize(const Device template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); template void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device); template void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode); -template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device); +template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive); } // namespace NEO diff --git a/shared/source/gen12lp/preemption_gen12lp.cpp b/shared/source/gen12lp/preemption_gen12lp.cpp index 21af88eefe..7b1baf2f3f 100644 --- a/shared/source/gen12lp/preemption_gen12lp.cpp +++ b/shared/source/gen12lp/preemption_gen12lp.cpp @@ -26,5 +26,5 @@ template size_t PreemptionHelper::getPreemptionWaCsSize(const Device template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); template void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device); template void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode); -template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device); +template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive); } // namespace NEO diff --git a/shared/source/gen8/preemption_gen8.cpp b/shared/source/gen8/preemption_gen8.cpp index d897daf01c..5d9f180d96 100644 --- a/shared/source/gen8/preemption_gen8.cpp +++ b/shared/source/gen8/preemption_gen8.cpp @@ -118,5 +118,5 @@ void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTER } template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); -template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device); +template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive); } // namespace NEO diff --git a/shared/source/gen9/preemption_gen9.cpp b/shared/source/gen9/preemption_gen9.cpp index fe3703186e..2f93535c0a 100644 --- a/shared/source/gen9/preemption_gen9.cpp +++ b/shared/source/gen9/preemption_gen9.cpp @@ -76,6 +76,6 @@ template void PreemptionHelper::programStateSip(LinearStream &preambl template void PreemptionHelper::programStateSipCmd(LinearStream &preambleCmdStream, GraphicsAllocation *sipAllocation, LogicalStateHelper *logicalStateHelper); template size_t PreemptionHelper::getRequiredStateSipCmdSize(Device &device, bool isRcs); template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); -template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device); +template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, const HardwareInfo &hwInfo, bool debuggerActive); } // namespace NEO diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index 393ebac23d..c3e0717de0 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -676,8 +676,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAvailableWhenProgrammingEndi uint8_t buffer[128]; mockCsr->commandStream.replaceBuffer(&buffer[0], 128u); mockCsr->commandStream.replaceGraphicsAllocation(&mockAllocation); - auto &device = *pDevice; - mockCsr->programEndingCmd(mockCsr->commandStream, device, &location, ret); + mockCsr->programEndingCmd(mockCsr->commandStream, &location, ret, true); EXPECT_EQ(sizeof(MI_BATCH_BUFFER_START), mockCsr->commandStream.getUsed()); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -711,7 +710,6 @@ HWTEST_F(DirectSubmissionTest, givenDebugFlagSetWhenProgrammingEndingCommandThen auto &cmdStream = mockCsr->commandStream; cmdStream.replaceBuffer(&buffer[0], 256); cmdStream.replaceGraphicsAllocation(&mockAllocation); - auto &device = *pDevice; for (int32_t value : {-1, 0, 1}) { DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.set(value); @@ -719,7 +717,7 @@ HWTEST_F(DirectSubmissionTest, givenDebugFlagSetWhenProgrammingEndingCommandThen auto currectBbStartCmd = reinterpret_cast(cmdStream.getSpace(0)); uint64_t expectedGpuVa = cmdStream.getGraphicsAllocation()->getGpuAddress() + cmdStream.getUsed(); - mockCsr->programEndingCmd(cmdStream, device, &location, ret); + mockCsr->programEndingCmd(cmdStream, &location, ret, true); EncodeNoop::alignToCacheLine(cmdStream); if (value == 0) { diff --git a/shared/test/unit_test/xe_hp_core/xe_hp_sdv/test_preemption_xe_hp_sdv.cpp b/shared/test/unit_test/xe_hp_core/xe_hp_sdv/test_preemption_xe_hp_sdv.cpp index 9e07f814e8..908434fb8d 100644 --- a/shared/test/unit_test/xe_hp_core/xe_hp_sdv/test_preemption_xe_hp_sdv.cpp +++ b/shared/test/unit_test/xe_hp_core/xe_hp_sdv/test_preemption_xe_hp_sdv.cpp @@ -82,7 +82,7 @@ XEHPTEST_F(PreemptionXeHPTest, givenRevisionA0toBWhenProgrammingSipEndWaThenGlob StackVec streamStorage(1024); LinearStream cmdStream{streamStorage.begin(), streamStorage.size()}; - PreemptionHelper::programStateSipEndWa(cmdStream, *mockDevice); + PreemptionHelper::programStateSipEndWa(cmdStream, hwInfo, true); EXPECT_NE(0U, cmdStream.getUsed()); GenCmdList cmdList;