diff --git a/shared/source/direct_submission/direct_submission_controller.cpp b/shared/source/direct_submission/direct_submission_controller.cpp index 6509b03446..9088c0b4dd 100644 --- a/shared/source/direct_submission/direct_submission_controller.cpp +++ b/shared/source/direct_submission/direct_submission_controller.cpp @@ -14,6 +14,7 @@ #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_thread.h" #include "shared/source/os_interface/os_time.h" +#include "shared/source/os_interface/product_helper.h" #include #include @@ -111,21 +112,28 @@ void DirectSubmissionController::checkNewSubmissions() { std::lock_guard lock(this->directSubmissionsMutex); bool shouldRecalculateTimeout = false; + std::optional bcsTaskCount{}; for (auto &directSubmission : this->directSubmissions) { auto csr = directSubmission.first; auto &state = directSubmission.second; - - if (timeoutMode == TimeoutElapsedMode::bcsOnly && !EngineHelpers::isBcs(csr->getOsContext().getEngineType())) { + auto isBcs = EngineHelpers::isBcs(csr->getOsContext().getEngineType()); + if (timeoutMode == TimeoutElapsedMode::bcsOnly && !isBcs) { continue; } - + if (isBcs) { + bcsTaskCount = state.taskCount; + } auto taskCount = csr->peekTaskCount(); if (taskCount == state.taskCount) { if (state.isStopped) { continue; } + bool isCopyEngineIdle = true; + if (!isBcs && csr->getProductHelper().checkBcsForDirectSubmissionStop()) { + isCopyEngineIdle = isCopyEngineOnDeviceIdle(csr->getRootDeviceIndex(), bcsTaskCount); + } auto lock = csr->obtainUniqueOwnership(); - if (!isCsrIdleDetectionEnabled || isDirectSubmissionIdle(csr, lock)) { + if (!isCsrIdleDetectionEnabled || (isCopyEngineIdle && isDirectSubmissionIdle(csr, lock))) { csr->stopDirectSubmission(false, false); state.isStopped = true; shouldRecalculateTimeout = true; @@ -169,6 +177,27 @@ bool DirectSubmissionController::isDirectSubmissionIdle(CommandStreamReceiver *c return !csr->isBusyWithoutHang(lastHangCheckTime); } +bool DirectSubmissionController::isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional &bcsTaskCount) { + CommandStreamReceiver *bcsCsr = nullptr; + TaskCountType registeredTaskCount = 0; + for (auto &directSubmission : this->directSubmissions) { + auto csr = directSubmission.first; + if (csr->getRootDeviceIndex() == rootDeviceIndex && EngineHelpers::isBcs(csr->getOsContext().getEngineType())) { + if (!directSubmission.second.isStopped) { + registeredTaskCount = bcsTaskCount.value_or(directSubmission.second.taskCount); + bcsCsr = csr; + } + break; + } + } + if (bcsCsr == nullptr) { + return true; + } + + auto lock = bcsCsr->obtainUniqueOwnership(); + return (bcsCsr->peekTaskCount() == registeredTaskCount) && isDirectSubmissionIdle(bcsCsr, lock); +} + SteadyClock::time_point DirectSubmissionController::getCpuTimestamp() { return SteadyClock::now(); } diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h index 6fb88f9e54..309883e0f5 100644 --- a/shared/source/direct_submission/direct_submission_controller.h +++ b/shared/source/direct_submission/direct_submission_controller.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -87,6 +88,7 @@ class DirectSubmissionController { static void *controlDirectSubmissionsState(void *self); void checkNewSubmissions(); bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock &csrLock); + bool isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional &bcsTaskCount); MOCKABLE_VIRTUAL bool sleep(std::unique_lock &lock); MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp(); MOCKABLE_VIRTUAL void overrideDirectSubmissionTimeouts(const ProductHelper &productHelper); diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index 225419674e..44376bc628 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -274,6 +274,7 @@ class ProductHelper { virtual void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const = 0; virtual uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const = 0; virtual uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const = 0; + virtual bool checkBcsForDirectSubmissionStop() const = 0; virtual bool shouldRegisterEnqueuedWalkerWithProfiling() const = 0; virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index 00474648ef..5ace6f18bd 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -1098,6 +1098,11 @@ uint32_t ProductHelperHw::getNumRtStacksPerDSSForAllocation(const Ha return RayTracingHelper::getAsyncNumRTStacksPerDss(); } +template +bool ProductHelperHw::checkBcsForDirectSubmissionStop() const { + return false; +} + template bool ProductHelperHw::shouldRegisterEnqueuedWalkerWithProfiling() const { return false; diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index e61a2f59b0..2ccbadf4d7 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -211,6 +211,7 @@ class ProductHelperHw : public ProductHelper { void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const override; uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const override; uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const override; + bool checkBcsForDirectSubmissionStop() const override; bool shouldRegisterEnqueuedWalkerWithProfiling() const override; ~ProductHelperHw() override = default; diff --git a/shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl b/shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl index 94b98a54e8..8a404e9268 100644 --- a/shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl +++ b/shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl @@ -51,4 +51,9 @@ void ProductHelperHw::adjustScratchSize(size_t &requiredScratchSize) requiredScratchSize *= 2; } +template <> +bool ProductHelperHw::checkBcsForDirectSubmissionStop() const { + return true; +} + } // namespace NEO diff --git a/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp index d953318fc0..059ea29ef5 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_controller_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/product_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_execution_environment.h" @@ -458,4 +459,118 @@ TEST_F(DirectSubmissionIdleDetectionTests, givenDebugFlagSetWhenTaskCountNotUpda EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes); } +struct DirectSubmissionCheckForCopyEngineIdleTests : public ::testing::Test { + void SetUp() override { + controller = std::make_unique(); + executionEnvironment.prepareRootDeviceEnvironments(2); + executionEnvironment.initializeMemoryManager(); + executionEnvironment.rootDeviceEnvironments[0]->initOsTime(); + + DeviceBitfield deviceBitfield(1); + ccsCsr = std::make_unique(executionEnvironment, 0, deviceBitfield); + bcsCsr = std::make_unique(executionEnvironment, 0, deviceBitfield); + ccsOsContext.reset(OsContext::create(nullptr, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield))); + bcsOsContext.reset(OsContext::create(nullptr, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield))); + ccsCsr->setupContext(*ccsOsContext); + bcsCsr->setupContext(*bcsOsContext); + + controller->timeoutElapsedReturnValue.store(TimeoutElapsedMode::fullyElapsed); + controller->registerDirectSubmission(ccsCsr.get()); + controller->registerDirectSubmission(bcsCsr.get()); + bcsCsr->taskCount.store(10u); + ccsCsr->taskCount.store(10u); + controller->checkNewSubmissions(); + } + + void TearDown() override { + controller->unregisterDirectSubmission(ccsCsr.get()); + controller->unregisterDirectSubmission(bcsCsr.get()); + } + + MockExecutionEnvironment executionEnvironment{defaultHwInfo.get(), true, 2u}; + + std::unique_ptr osContext; + std::unique_ptr ccsCsr; + std::unique_ptr ccsOsContext; + + std::unique_ptr bcsCsr; + std::unique_ptr bcsOsContext; + std::unique_ptr controller; +}; + +TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineBusyThenDontTerminateDirectSubmission) { + ccsCsr->setLatestFlushedTaskCount(10u); + bcsCsr->setLatestFlushedTaskCount(10u); + + ccsCsr->isBusyReturnValue = false; + bcsCsr->isBusyReturnValue = true; + controller->directSubmissions[bcsCsr.get()].isStopped = false; + controller->checkNewSubmissions(); + EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u); + + if (ccsCsr->getProductHelper().checkBcsForDirectSubmissionStop()) { + EXPECT_FALSE(controller->directSubmissions[ccsCsr.get()].isStopped); + EXPECT_EQ(0u, ccsCsr->stopDirectSubmissionCalledTimes); + } else { + EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped); + EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes); + } +} + +TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineUpdatedTaskCountThenDontTerminateDirectSubmission) { + ccsCsr->setLatestFlushedTaskCount(10u); + bcsCsr->setLatestFlushedTaskCount(10u); + + ccsCsr->isBusyReturnValue = false; + bcsCsr->isBusyReturnValue = false; + controller->directSubmissions[bcsCsr.get()].isStopped = false; + bcsCsr->taskCount.store(20u); + + controller->checkNewSubmissions(); + EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u); + + if (ccsCsr->getProductHelper().checkBcsForDirectSubmissionStop()) { + EXPECT_FALSE(controller->directSubmissions[ccsCsr.get()].isStopped); + EXPECT_EQ(0u, ccsCsr->stopDirectSubmissionCalledTimes); + } else { + EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped); + EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes); + } +} + +TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineBusyAndDifferentDeviceThenTerminateDirectSubmission) { + DeviceBitfield deviceBitfield(1); + TagUpdateMockCommandStreamReceiver secondDeviceCsr(executionEnvironment, 1, deviceBitfield); + std::unique_ptr osContext(OsContext::create(nullptr, 1, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield))); + secondDeviceCsr.setupContext(*osContext); + controller->registerDirectSubmission(&secondDeviceCsr); + secondDeviceCsr.taskCount.store(10u); + controller->checkNewSubmissions(); + + secondDeviceCsr.setLatestFlushedTaskCount(10u); + bcsCsr->setLatestFlushedTaskCount(10u); + + secondDeviceCsr.isBusyReturnValue = false; + bcsCsr->isBusyReturnValue = true; + controller->directSubmissions[bcsCsr.get()].isStopped = false; + controller->checkNewSubmissions(); + EXPECT_EQ(controller->directSubmissions[&secondDeviceCsr].taskCount, 10u); + EXPECT_TRUE(controller->directSubmissions[&secondDeviceCsr].isStopped); + EXPECT_EQ(1u, secondDeviceCsr.stopDirectSubmissionCalledTimes); +} + +TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCopyEngineNotStartedThenTerminateDirectSubmission) { + ccsCsr->setLatestFlushedTaskCount(10u); + bcsCsr->setLatestFlushedTaskCount(10u); + + ccsCsr->isBusyReturnValue = false; + bcsCsr->isBusyReturnValue = true; + controller->directSubmissions[bcsCsr.get()].isStopped = true; + + controller->checkNewSubmissions(); + EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u); + EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped); + EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes); +} + } // namespace NEO \ No newline at end of file