mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 20:39:56 +08:00
performance: don't terminate ULLS if BCS is busy
Related-To: NEO-15452 If CCS is idle, but BCS is busy, keep CCS ULLS context running. BMG only. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2ee3070a1e
commit
1d842c58bf
@@ -14,6 +14,7 @@
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/os_thread.h"
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
@@ -111,21 +112,28 @@ void DirectSubmissionController::checkNewSubmissions() {
|
||||
|
||||
std::lock_guard<std::mutex> lock(this->directSubmissionsMutex);
|
||||
bool shouldRecalculateTimeout = false;
|
||||
std::optional<TaskCountType> bcsTaskCount{};
|
||||
for (auto &directSubmission : this->directSubmissions) {
|
||||
auto csr = directSubmission.first;
|
||||
auto &state = directSubmission.second;
|
||||
|
||||
if (timeoutMode == TimeoutElapsedMode::bcsOnly && !EngineHelpers::isBcs(csr->getOsContext().getEngineType())) {
|
||||
auto isBcs = EngineHelpers::isBcs(csr->getOsContext().getEngineType());
|
||||
if (timeoutMode == TimeoutElapsedMode::bcsOnly && !isBcs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isBcs) {
|
||||
bcsTaskCount = state.taskCount;
|
||||
}
|
||||
auto taskCount = csr->peekTaskCount();
|
||||
if (taskCount == state.taskCount) {
|
||||
if (state.isStopped) {
|
||||
continue;
|
||||
}
|
||||
bool isCopyEngineIdle = true;
|
||||
if (!isBcs && csr->getProductHelper().checkBcsForDirectSubmissionStop()) {
|
||||
isCopyEngineIdle = isCopyEngineOnDeviceIdle(csr->getRootDeviceIndex(), bcsTaskCount);
|
||||
}
|
||||
auto lock = csr->obtainUniqueOwnership();
|
||||
if (!isCsrIdleDetectionEnabled || isDirectSubmissionIdle(csr, lock)) {
|
||||
if (!isCsrIdleDetectionEnabled || (isCopyEngineIdle && isDirectSubmissionIdle(csr, lock))) {
|
||||
csr->stopDirectSubmission(false, false);
|
||||
state.isStopped = true;
|
||||
shouldRecalculateTimeout = true;
|
||||
@@ -169,6 +177,27 @@ bool DirectSubmissionController::isDirectSubmissionIdle(CommandStreamReceiver *c
|
||||
return !csr->isBusyWithoutHang(lastHangCheckTime);
|
||||
}
|
||||
|
||||
bool DirectSubmissionController::isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional<TaskCountType> &bcsTaskCount) {
|
||||
CommandStreamReceiver *bcsCsr = nullptr;
|
||||
TaskCountType registeredTaskCount = 0;
|
||||
for (auto &directSubmission : this->directSubmissions) {
|
||||
auto csr = directSubmission.first;
|
||||
if (csr->getRootDeviceIndex() == rootDeviceIndex && EngineHelpers::isBcs(csr->getOsContext().getEngineType())) {
|
||||
if (!directSubmission.second.isStopped) {
|
||||
registeredTaskCount = bcsTaskCount.value_or(directSubmission.second.taskCount);
|
||||
bcsCsr = csr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (bcsCsr == nullptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto lock = bcsCsr->obtainUniqueOwnership();
|
||||
return (bcsCsr->peekTaskCount() == registeredTaskCount) && isDirectSubmissionIdle(bcsCsr, lock);
|
||||
}
|
||||
|
||||
SteadyClock::time_point DirectSubmissionController::getCpuTimestamp() {
|
||||
return SteadyClock::now();
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
|
||||
@@ -87,6 +88,7 @@ class DirectSubmissionController {
|
||||
static void *controlDirectSubmissionsState(void *self);
|
||||
void checkNewSubmissions();
|
||||
bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock<std::recursive_mutex> &csrLock);
|
||||
bool isCopyEngineOnDeviceIdle(uint32_t rootDeviceIndex, std::optional<TaskCountType> &bcsTaskCount);
|
||||
MOCKABLE_VIRTUAL bool sleep(std::unique_lock<std::mutex> &lock);
|
||||
MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp();
|
||||
MOCKABLE_VIRTUAL void overrideDirectSubmissionTimeouts(const ProductHelper &productHelper);
|
||||
|
||||
@@ -274,6 +274,7 @@ class ProductHelper {
|
||||
virtual void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool checkBcsForDirectSubmissionStop() const = 0;
|
||||
virtual bool shouldRegisterEnqueuedWalkerWithProfiling() const = 0;
|
||||
|
||||
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
|
||||
|
||||
@@ -1098,6 +1098,11 @@ uint32_t ProductHelperHw<gfxProduct>::getNumRtStacksPerDSSForAllocation(const Ha
|
||||
return RayTracingHelper::getAsyncNumRTStacksPerDss();
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::checkBcsForDirectSubmissionStop() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::shouldRegisterEnqueuedWalkerWithProfiling() const {
|
||||
return false;
|
||||
|
||||
@@ -211,6 +211,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
void adjustRTDispatchGlobals(RTDispatchGlobals &rtDispatchGlobals, const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getSyncNumRTStacksPerDss(const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getNumRtStacksPerDSSForAllocation(const HardwareInfo &hwInfo) const override;
|
||||
bool checkBcsForDirectSubmissionStop() const override;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling() const override;
|
||||
|
||||
~ProductHelperHw() override = default;
|
||||
|
||||
@@ -51,4 +51,9 @@ void ProductHelperHw<gfxProduct>::adjustScratchSize(size_t &requiredScratchSize)
|
||||
requiredScratchSize *= 2;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::checkBcsForDirectSubmissionStop() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
@@ -458,4 +459,118 @@ TEST_F(DirectSubmissionIdleDetectionTests, givenDebugFlagSetWhenTaskCountNotUpda
|
||||
EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
struct DirectSubmissionCheckForCopyEngineIdleTests : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
controller = std::make_unique<DirectSubmissionControllerMock>();
|
||||
executionEnvironment.prepareRootDeviceEnvironments(2);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
ccsCsr = std::make_unique<TagUpdateMockCommandStreamReceiver>(executionEnvironment, 0, deviceBitfield);
|
||||
bcsCsr = std::make_unique<TagUpdateMockCommandStreamReceiver>(executionEnvironment, 0, deviceBitfield);
|
||||
ccsOsContext.reset(OsContext::create(nullptr, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
bcsOsContext.reset(OsContext::create(nullptr, 0, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
ccsCsr->setupContext(*ccsOsContext);
|
||||
bcsCsr->setupContext(*bcsOsContext);
|
||||
|
||||
controller->timeoutElapsedReturnValue.store(TimeoutElapsedMode::fullyElapsed);
|
||||
controller->registerDirectSubmission(ccsCsr.get());
|
||||
controller->registerDirectSubmission(bcsCsr.get());
|
||||
bcsCsr->taskCount.store(10u);
|
||||
ccsCsr->taskCount.store(10u);
|
||||
controller->checkNewSubmissions();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
controller->unregisterDirectSubmission(ccsCsr.get());
|
||||
controller->unregisterDirectSubmission(bcsCsr.get());
|
||||
}
|
||||
|
||||
MockExecutionEnvironment executionEnvironment{defaultHwInfo.get(), true, 2u};
|
||||
|
||||
std::unique_ptr<OsContext> osContext;
|
||||
std::unique_ptr<TagUpdateMockCommandStreamReceiver> ccsCsr;
|
||||
std::unique_ptr<OsContext> ccsOsContext;
|
||||
|
||||
std::unique_ptr<TagUpdateMockCommandStreamReceiver> bcsCsr;
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
std::unique_ptr<DirectSubmissionControllerMock> controller;
|
||||
};
|
||||
|
||||
TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineBusyThenDontTerminateDirectSubmission) {
|
||||
ccsCsr->setLatestFlushedTaskCount(10u);
|
||||
bcsCsr->setLatestFlushedTaskCount(10u);
|
||||
|
||||
ccsCsr->isBusyReturnValue = false;
|
||||
bcsCsr->isBusyReturnValue = true;
|
||||
controller->directSubmissions[bcsCsr.get()].isStopped = false;
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u);
|
||||
|
||||
if (ccsCsr->getProductHelper().checkBcsForDirectSubmissionStop()) {
|
||||
EXPECT_FALSE(controller->directSubmissions[ccsCsr.get()].isStopped);
|
||||
EXPECT_EQ(0u, ccsCsr->stopDirectSubmissionCalledTimes);
|
||||
} else {
|
||||
EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped);
|
||||
EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineUpdatedTaskCountThenDontTerminateDirectSubmission) {
|
||||
ccsCsr->setLatestFlushedTaskCount(10u);
|
||||
bcsCsr->setLatestFlushedTaskCount(10u);
|
||||
|
||||
ccsCsr->isBusyReturnValue = false;
|
||||
bcsCsr->isBusyReturnValue = false;
|
||||
controller->directSubmissions[bcsCsr.get()].isStopped = false;
|
||||
bcsCsr->taskCount.store(20u);
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u);
|
||||
|
||||
if (ccsCsr->getProductHelper().checkBcsForDirectSubmissionStop()) {
|
||||
EXPECT_FALSE(controller->directSubmissions[ccsCsr.get()].isStopped);
|
||||
EXPECT_EQ(0u, ccsCsr->stopDirectSubmissionCalledTimes);
|
||||
} else {
|
||||
EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped);
|
||||
EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCCSIdleAndCopyEngineBusyAndDifferentDeviceThenTerminateDirectSubmission) {
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
TagUpdateMockCommandStreamReceiver secondDeviceCsr(executionEnvironment, 1, deviceBitfield);
|
||||
std::unique_ptr<OsContext> osContext(OsContext::create(nullptr, 1, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular}, PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
secondDeviceCsr.setupContext(*osContext);
|
||||
controller->registerDirectSubmission(&secondDeviceCsr);
|
||||
secondDeviceCsr.taskCount.store(10u);
|
||||
controller->checkNewSubmissions();
|
||||
|
||||
secondDeviceCsr.setLatestFlushedTaskCount(10u);
|
||||
bcsCsr->setLatestFlushedTaskCount(10u);
|
||||
|
||||
secondDeviceCsr.isBusyReturnValue = false;
|
||||
bcsCsr->isBusyReturnValue = true;
|
||||
controller->directSubmissions[bcsCsr.get()].isStopped = false;
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_EQ(controller->directSubmissions[&secondDeviceCsr].taskCount, 10u);
|
||||
EXPECT_TRUE(controller->directSubmissions[&secondDeviceCsr].isStopped);
|
||||
EXPECT_EQ(1u, secondDeviceCsr.stopDirectSubmissionCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionCheckForCopyEngineIdleTests, givenCheckBcsForDirectSubmissionStopWhenCopyEngineNotStartedThenTerminateDirectSubmission) {
|
||||
ccsCsr->setLatestFlushedTaskCount(10u);
|
||||
bcsCsr->setLatestFlushedTaskCount(10u);
|
||||
|
||||
ccsCsr->isBusyReturnValue = false;
|
||||
bcsCsr->isBusyReturnValue = true;
|
||||
controller->directSubmissions[bcsCsr.get()].isStopped = true;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_EQ(controller->directSubmissions[ccsCsr.get()].taskCount, 10u);
|
||||
EXPECT_TRUE(controller->directSubmissions[ccsCsr.get()].isStopped);
|
||||
EXPECT_EQ(1u, ccsCsr->stopDirectSubmissionCalledTimes);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user