mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
performance: debug key for adjust ULLS on battery
ULLS controller timeout settings will be adjusted based on ac line status and lowest queue throttle from submissions. Lowest queue throttle is reset when controller stops ULLS. Related-To: NEO-10800 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ec19ce536a
commit
2b964254d6
@@ -634,6 +634,7 @@ void CommandStreamReceiver::downloadAllocation(GraphicsAllocation &gfxAllocation
|
||||
void CommandStreamReceiver::startControllingDirectSubmissions() {
|
||||
auto controller = this->executionEnvironment.directSubmissionController.get();
|
||||
if (controller) {
|
||||
controller->setTimeoutParamsForPlatform(this->getProductHelper());
|
||||
controller->startControlling();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/completion_stamp.h"
|
||||
#include "shared/source/helpers/kmd_notify_properties.h"
|
||||
#include "shared/source/helpers/options.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
|
||||
@@ -323,6 +324,8 @@ class CommandStreamReceiver {
|
||||
|
||||
virtual void stopDirectSubmission(bool blocking) {}
|
||||
|
||||
virtual QueueThrottle getLastDirectSubmissionThrottle() = 0;
|
||||
|
||||
bool isStaticWorkPartitioningEnabled() const {
|
||||
return staticWorkPartitioningEnabled;
|
||||
}
|
||||
@@ -460,6 +463,13 @@ class CommandStreamReceiver {
|
||||
return this->resourcesInitialized;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL bool getAcLineConnected(bool updateStatus) const {
|
||||
if (updateStatus) {
|
||||
this->kmdNotifyHelper->updateAcLineStatus();
|
||||
}
|
||||
return this->kmdNotifyHelper->getAcLineConnected();
|
||||
}
|
||||
|
||||
uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; }
|
||||
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
|
||||
|
||||
|
||||
@@ -156,6 +156,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
|
||||
void stopDirectSubmission(bool blocking) override;
|
||||
|
||||
QueueThrottle getLastDirectSubmissionThrottle() override;
|
||||
|
||||
virtual bool isKmdWaitModeActive() { return true; }
|
||||
|
||||
bool initDirectSubmission() override;
|
||||
|
||||
@@ -1362,6 +1362,18 @@ inline void CommandStreamReceiverHw<GfxFamily>::stopDirectSubmission(bool blocki
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline QueueThrottle CommandStreamReceiverHw<GfxFamily>::getLastDirectSubmissionThrottle() {
|
||||
if (this->isAnyDirectSubmissionEnabled()) {
|
||||
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
|
||||
return this->blitterDirectSubmission->getLastSubmittedThrottle();
|
||||
} else {
|
||||
return this->directSubmission->getLastSubmittedThrottle();
|
||||
}
|
||||
}
|
||||
return QueueThrottle::MEDIUM;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool CommandStreamReceiverHw<GfxFamily>::initDirectSubmission() {
|
||||
bool ret = true;
|
||||
|
||||
@@ -402,6 +402,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable di
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5000 us, >=0: timeout in us")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerMaxTimeout, -1, "Set direct submission controller max timeout - timeout will increase up to given value, -1: default 5000 us, >=0: max timeout in us")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerDivisor, -1, "Set direct submission controller timeout divider, -1: default 1, >0: divider value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus, -1, "Adjust controller timeout settings based on queue throttle and ac line status, -1: default, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionForceLocalMemoryStorageMode, -1, "Force local memory storage for command/ring/semaphore buffer, -1: default - for all engines, 0: disabled, 1: for multiOsContextCapable engine, 2: for all engines")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionFences wont be updated if ring is not running.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPCIBarrier, -1, "Use PCI barrier for data synchronization before semaphore unblock -1: default, 0 - disable, 1 - enable.")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/os_thread.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
@@ -28,6 +29,10 @@ DirectSubmissionController::DirectSubmissionController() {
|
||||
maxTimeout = std::chrono::microseconds{debugManager.flags.DirectSubmissionControllerMaxTimeout.get()};
|
||||
}
|
||||
|
||||
if (debugManager.flags.DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus.get() != -1) {
|
||||
adjustTimeoutOnThrottleAndAcLineStatus = debugManager.flags.DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus.get();
|
||||
}
|
||||
|
||||
directSubmissionControllingThread = Thread::create(controlDirectSubmissionsState, reinterpret_cast<void *>(this));
|
||||
};
|
||||
|
||||
@@ -45,6 +50,33 @@ void DirectSubmissionController::registerDirectSubmission(CommandStreamReceiver
|
||||
this->adjustTimeout(csr);
|
||||
}
|
||||
|
||||
void DirectSubmissionController::setTimeoutParamsForPlatform(const ProductHelper &helper) {
|
||||
for (auto throttle : {QueueThrottle::LOW, QueueThrottle::MEDIUM, QueueThrottle::HIGH}) {
|
||||
for (auto acLineStatus : {false, true}) {
|
||||
auto key = this->getTimeoutParamsMapKey(throttle, acLineStatus);
|
||||
auto timeoutParam = std::make_pair(key, helper.getDirectSubmissionControllerTimeoutParams(acLineStatus, throttle));
|
||||
this->timeoutParamsMap.insert(timeoutParam);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DirectSubmissionController::applyTimeoutForAcLineStatusAndThrottle(bool acLineConnected) {
|
||||
const auto &timeoutParams = this->timeoutParamsMap[this->getTimeoutParamsMapKey(this->lowestThrottleSubmitted, acLineConnected)];
|
||||
this->timeout = timeoutParams.timeout;
|
||||
this->maxTimeout = timeoutParams.maxTimeout;
|
||||
this->timeoutDivisor = timeoutParams.timeoutDivisor;
|
||||
}
|
||||
|
||||
void DirectSubmissionController::updateLastSubmittedThrottle(QueueThrottle throttle) {
|
||||
if (throttle < this->lowestThrottleSubmitted) {
|
||||
this->lowestThrottleSubmitted = throttle;
|
||||
}
|
||||
}
|
||||
|
||||
size_t DirectSubmissionController::getTimeoutParamsMapKey(QueueThrottle throttle, bool acLineStatus) {
|
||||
return (static_cast<size_t>(throttle) << 1) + acLineStatus;
|
||||
}
|
||||
|
||||
void DirectSubmissionController::unregisterDirectSubmission(CommandStreamReceiver *csr) {
|
||||
std::lock_guard<std::mutex> lock(directSubmissionsMutex);
|
||||
directSubmissions.erase(csr);
|
||||
@@ -78,8 +110,9 @@ void *DirectSubmissionController::controlDirectSubmissionsState(void *self) {
|
||||
void DirectSubmissionController::checkNewSubmissions() {
|
||||
std::lock_guard<std::mutex> lock(this->directSubmissionsMutex);
|
||||
bool shouldRecalculateTimeout = false;
|
||||
CommandStreamReceiver *csr = nullptr;
|
||||
for (auto &directSubmission : this->directSubmissions) {
|
||||
auto csr = directSubmission.first;
|
||||
csr = directSubmission.first;
|
||||
auto &state = directSubmission.second;
|
||||
|
||||
auto taskCount = csr->peekTaskCount();
|
||||
@@ -91,10 +124,15 @@ void DirectSubmissionController::checkNewSubmissions() {
|
||||
csr->stopDirectSubmission(false);
|
||||
state.isStopped = true;
|
||||
shouldRecalculateTimeout = true;
|
||||
this->lowestThrottleSubmitted = QueueThrottle::HIGH;
|
||||
}
|
||||
} else {
|
||||
state.isStopped = false;
|
||||
state.taskCount = taskCount;
|
||||
if (this->adjustTimeoutOnThrottleAndAcLineStatus) {
|
||||
this->updateLastSubmittedThrottle(csr->getLastDirectSubmissionThrottle());
|
||||
this->applyTimeoutForAcLineStatusAndThrottle(csr->getAcLineConnected(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (shouldRecalculateTimeout) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/queue_throttle.h"
|
||||
#include "shared/source/command_stream/task_count_helper.h"
|
||||
#include "shared/source/helpers/device_bitfield.h"
|
||||
|
||||
@@ -21,15 +22,24 @@ namespace NEO {
|
||||
class MemoryManager;
|
||||
class CommandStreamReceiver;
|
||||
class Thread;
|
||||
class ProductHelper;
|
||||
|
||||
using SteadyClock = std::chrono::steady_clock;
|
||||
|
||||
struct TimeoutParams {
|
||||
std::chrono::microseconds maxTimeout;
|
||||
std::chrono::microseconds timeout;
|
||||
int timeoutDivisor;
|
||||
bool directSubmissionEnabled;
|
||||
};
|
||||
|
||||
class DirectSubmissionController {
|
||||
public:
|
||||
static constexpr size_t defaultTimeout = 5'000;
|
||||
DirectSubmissionController();
|
||||
virtual ~DirectSubmissionController();
|
||||
|
||||
void setTimeoutParamsForPlatform(const ProductHelper &helper);
|
||||
void registerDirectSubmission(CommandStreamReceiver *csr);
|
||||
void unregisterDirectSubmission(CommandStreamReceiver *csr);
|
||||
|
||||
@@ -50,6 +60,9 @@ class DirectSubmissionController {
|
||||
|
||||
void adjustTimeout(CommandStreamReceiver *csr);
|
||||
void recalculateTimeout();
|
||||
void applyTimeoutForAcLineStatusAndThrottle(bool acLineConnected);
|
||||
void updateLastSubmittedThrottle(QueueThrottle throttle);
|
||||
size_t getTimeoutParamsMapKey(QueueThrottle throttle, bool acLineStatus);
|
||||
|
||||
uint32_t maxCcsCount = 1u;
|
||||
std::array<uint32_t, DeviceBitfield().size()> ccsCount = {};
|
||||
@@ -64,5 +77,8 @@ class DirectSubmissionController {
|
||||
std::chrono::microseconds maxTimeout{defaultTimeout};
|
||||
std::chrono::microseconds timeout{defaultTimeout};
|
||||
int timeoutDivisor = 1;
|
||||
std::unordered_map<size_t, TimeoutParams> timeoutParamsMap;
|
||||
QueueThrottle lowestThrottleSubmitted = QueueThrottle::HIGH;
|
||||
bool adjustTimeoutOnThrottleAndAcLineStatus = true;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -95,6 +95,10 @@ class DirectSubmissionHw {
|
||||
|
||||
virtual void flushMonitorFence(){};
|
||||
|
||||
QueueThrottle getLastSubmittedThrottle() {
|
||||
return this->lastSubmittedThrottle;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
|
||||
static constexpr size_t prefetchNoops = prefetchSize / sizeof(uint32_t);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -53,6 +53,7 @@ class KmdNotifyHelper {
|
||||
bool quickKmdSleepForSporadicWaitsEnabled() const { return properties->enableQuickKmdSleepForSporadicWaits; }
|
||||
MOCKABLE_VIRTUAL void updateLastWaitForCompletionTimestamp();
|
||||
MOCKABLE_VIRTUAL void updateAcLineStatus();
|
||||
bool getAcLineConnected() const { return acLineConnected.load(); }
|
||||
|
||||
static void overrideFromDebugVariable(int32_t debugVariableValue, int64_t &destination);
|
||||
static void overrideFromDebugVariable(int32_t debugVariableValue, bool &destination);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/queue_throttle.h"
|
||||
#include "shared/source/command_stream/task_count_helper.h"
|
||||
|
||||
#include "aubstream/engine_node.h"
|
||||
@@ -39,6 +40,7 @@ class ReleaseHelper;
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
struct RootDeviceEnvironment;
|
||||
struct TimeoutParams;
|
||||
class OSInterface;
|
||||
class DriverModel;
|
||||
enum class DriverModelType;
|
||||
@@ -112,6 +114,7 @@ class ProductHelper {
|
||||
virtual bool isNewResidencyModelSupported() const = 0;
|
||||
virtual bool isDirectSubmissionSupported(ReleaseHelper *releaseHelper) const = 0;
|
||||
virtual bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual TimeoutParams getDirectSubmissionControllerTimeoutParams(bool acLineConnected, QueueThrottle queueThrottle) const = 0;
|
||||
virtual std::pair<bool, bool> isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const = 0;
|
||||
virtual bool heapInLocalMem(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) = 0;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/direct_submission/direct_submission_controller.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
@@ -356,6 +357,16 @@ bool ProductHelperHw<gfxProduct>::isDirectSubmissionConstantCacheInvalidationNee
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
TimeoutParams ProductHelperHw<gfxProduct>::getDirectSubmissionControllerTimeoutParams(bool acLineConnected, QueueThrottle queueThrottle) const {
|
||||
TimeoutParams params{};
|
||||
params.maxTimeout = std::chrono::microseconds{DirectSubmissionController::defaultTimeout};
|
||||
params.timeout = std::chrono::microseconds{DirectSubmissionController::defaultTimeout};
|
||||
params.timeoutDivisor = 1;
|
||||
params.directSubmissionEnabled = true;
|
||||
return params;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
|
||||
@@ -59,6 +59,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isNewResidencyModelSupported() const override;
|
||||
bool isDirectSubmissionSupported(ReleaseHelper *releaseHelper) const override;
|
||||
bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const override;
|
||||
TimeoutParams getDirectSubmissionControllerTimeoutParams(bool acLineConnected, QueueThrottle queueThrottle) const override;
|
||||
std::pair<bool, bool> isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const override;
|
||||
bool heapInLocalMem(const HardwareInfo &hwInfo) const override;
|
||||
void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) override;
|
||||
|
||||
Reference in New Issue
Block a user