feat: direct submission variable timeout

Add mechanism to increase direct submission timeout up to a maximum
value when no new submissions were made since last sleep.

This should help in workloads that have delays between iterations larger
than current direct submission controller timeout.

Related-To: NEO-7878

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-04-06 12:14:02 +00:00
committed by Compute-Runtime-Automation
parent 51b8dc66a3
commit 411ed1c643
6 changed files with 150 additions and 29 deletions

View File

@@ -19,11 +19,14 @@ namespace NEO {
DirectSubmissionController::DirectSubmissionController() {
if (DebugManager.flags.DirectSubmissionControllerTimeout.get() != -1) {
timeout = DebugManager.flags.DirectSubmissionControllerTimeout.get();
timeout = std::chrono::microseconds{DebugManager.flags.DirectSubmissionControllerTimeout.get()};
}
if (DebugManager.flags.DirectSubmissionControllerDivisor.get() != -1) {
timeoutDivisor = DebugManager.flags.DirectSubmissionControllerDivisor.get();
}
if (DebugManager.flags.DirectSubmissionControllerMaxTimeout.get() != -1) {
maxTimeout = std::chrono::microseconds{DebugManager.flags.DirectSubmissionControllerMaxTimeout.get()};
}
directSubmissionControllingThread = Thread::create(controlDirectSubmissionsState, reinterpret_cast<void *>(this));
};
@@ -74,7 +77,7 @@ void *DirectSubmissionController::controlDirectSubmissionsState(void *self) {
void DirectSubmissionController::checkNewSubmissions() {
std::lock_guard<std::mutex> lock(this->directSubmissionsMutex);
bool shouldRecalculateTimeout = false;
for (auto &directSubmission : this->directSubmissions) {
auto csr = directSubmission.first;
auto &state = directSubmission.second;
@@ -87,18 +90,26 @@ void DirectSubmissionController::checkNewSubmissions() {
auto lock = csr->obtainUniqueOwnership();
csr->stopDirectSubmission();
state.isStopped = true;
shouldRecalculateTimeout = true;
}
} else {
state.isStopped = false;
state.taskCount = taskCount;
}
}
if (shouldRecalculateTimeout) {
this->recalculateTimeout();
}
}
void DirectSubmissionController::sleep() {
std::this_thread::sleep_for(std::chrono::microseconds(this->timeout));
}
SteadyClock::time_point DirectSubmissionController::getCpuTimestamp() {
return SteadyClock::now();
}
void DirectSubmissionController::adjustTimeout(CommandStreamReceiver *csr) {
if (EngineHelpers::isCcs(csr->getOsContext().getEngineType())) {
for (size_t subDeviceIndex = 0u; subDeviceIndex < csr->getOsContext().getDeviceBitfield().size(); ++subDeviceIndex) {
@@ -114,4 +125,16 @@ void DirectSubmissionController::adjustTimeout(CommandStreamReceiver *csr) {
}
}
void DirectSubmissionController::recalculateTimeout() {
const auto now = this->getCpuTimestamp();
const auto timeSinceLastTerminate = std::chrono::duration_cast<std::chrono::microseconds>(now - this->lastTerminateCpuTimestamp);
DEBUG_BREAK_IF(timeSinceLastTerminate.count() < 0);
if (timeSinceLastTerminate.count() > this->timeout.count() &&
timeSinceLastTerminate.count() <= this->maxTimeout.count()) {
const auto newTimeout = std::chrono::duration_cast<std::chrono::microseconds>(timeSinceLastTerminate * 1.5);
this->timeout = newTimeout.count() < this->maxTimeout.count() ? newTimeout : this->maxTimeout;
}
this->lastTerminateCpuTimestamp = now;
}
} // namespace NEO

View File

@@ -12,6 +12,7 @@
#include <array>
#include <atomic>
#include <chrono>
#include <memory>
#include <mutex>
#include <unordered_map>
@@ -21,8 +22,11 @@ class MemoryManager;
class CommandStreamReceiver;
class Thread;
using SteadyClock = std::chrono::steady_clock;
class DirectSubmissionController {
public:
static constexpr size_t defaultTimeout = 5'000;
DirectSubmissionController();
virtual ~DirectSubmissionController();
@@ -42,8 +46,10 @@ class DirectSubmissionController {
static void *controlDirectSubmissionsState(void *self);
void checkNewSubmissions();
MOCKABLE_VIRTUAL void sleep();
MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp();
void adjustTimeout(CommandStreamReceiver *csr);
void recalculateTimeout();
uint32_t maxCcsCount = 1u;
std::array<uint32_t, DeviceBitfield().size()> ccsCount = {};
@@ -54,7 +60,9 @@ class DirectSubmissionController {
std::atomic_bool keepControlling = true;
std::atomic_bool runControlling = false;
int timeout = 5000;
SteadyClock::time_point lastTerminateCpuTimestamp{};
std::chrono::microseconds maxTimeout{defaultTimeout};
std::chrono::microseconds timeout{defaultTimeout};
int timeoutDivisor = 1;
};
} // namespace NEO