mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: improve ULLS controller timeout detection
Related-To: NEO-12991 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7a440f1143
commit
01a0b8e7f7
@@ -20,6 +20,7 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenTimeo
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
@@ -82,6 +83,7 @@ TEST(DirectSubmissionControllerTestsMt, givenDirectSubmissionControllerWhenEnque
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
|
||||
@@ -544,7 +544,7 @@ class CommandStreamReceiver {
|
||||
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
|
||||
virtual bool submitDependencyUpdate(TagNodeBase *tag) = 0;
|
||||
|
||||
bool isBusy() {
|
||||
MOCKABLE_VIRTUAL bool isBusy() {
|
||||
return !testTaskCountReady(getTagAddress(), this->taskCount);
|
||||
}
|
||||
|
||||
|
||||
@@ -444,7 +444,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: de
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSwitchSemaphoreMode, -1, "-1: default, 1: enable switch on unsuccessful, 0: disable switch on unsuccessful")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, WaitForPagingFenceInController, -1, "Instead of waiting for paging fence on user thread, program additional semaphore which will be signaled by direct submission controller when paging fence reaches required value -1: default, 0 - disable, 1 - enable.")
|
||||
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerIdleDetection, -1, "Terminate direct submission only if CSR is idle. -1: default, 0 - disable, 1 - enable.")
|
||||
/*FEATURE FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, false, "Evict USM allocation after implicit migration to GPU")
|
||||
DECLARE_DEBUG_VARIABLE(bool, RegisterPageFaultHandlerOnMigration, true, "Register handler on migration to GPU when current is not from pagefault manager")
|
||||
|
||||
@@ -9,9 +9,11 @@
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/sleep.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/os_thread.h"
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
|
||||
#include <chrono>
|
||||
@@ -29,6 +31,10 @@ DirectSubmissionController::DirectSubmissionController() {
|
||||
if (debugManager.flags.DirectSubmissionControllerMaxTimeout.get() != -1) {
|
||||
maxTimeout = std::chrono::microseconds{debugManager.flags.DirectSubmissionControllerMaxTimeout.get()};
|
||||
}
|
||||
isCsrIdleDetectionEnabled = false;
|
||||
if (debugManager.flags.DirectSubmissionControllerIdleDetection.get() != -1) {
|
||||
isCsrIdleDetectionEnabled = debugManager.flags.DirectSubmissionControllerIdleDetection.get();
|
||||
}
|
||||
};
|
||||
|
||||
DirectSubmissionController::~DirectSubmissionController() {
|
||||
@@ -145,13 +151,15 @@ void DirectSubmissionController::checkNewSubmissions() {
|
||||
if (taskCount == state.taskCount) {
|
||||
if (state.isStopped) {
|
||||
continue;
|
||||
} else {
|
||||
auto lock = csr->obtainUniqueOwnership();
|
||||
}
|
||||
auto lock = csr->obtainUniqueOwnership();
|
||||
if (!isCsrIdleDetectionEnabled || isDirectSubmissionIdle(csr, lock)) {
|
||||
csr->stopDirectSubmission(false);
|
||||
state.isStopped = true;
|
||||
shouldRecalculateTimeout = true;
|
||||
this->lowestThrottleSubmitted = QueueThrottle::HIGH;
|
||||
}
|
||||
state.taskCount = csr->peekTaskCount();
|
||||
} else {
|
||||
state.isStopped = false;
|
||||
state.taskCount = taskCount;
|
||||
@@ -171,6 +179,30 @@ bool DirectSubmissionController::sleep(std::unique_lock<std::mutex> &lock) {
|
||||
return NEO::waitOnConditionWithPredicate(condVar, lock, std::chrono::microseconds(this->timeout), [&] { return !pagingFenceRequests.empty(); });
|
||||
}
|
||||
|
||||
bool DirectSubmissionController::isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock<std::recursive_mutex> &csrLock) {
|
||||
if (csr->peekLatestFlushedTaskCount() == csr->peekTaskCount()) {
|
||||
return !csr->isBusy();
|
||||
}
|
||||
|
||||
csr->flushTagUpdate();
|
||||
|
||||
auto osTime = csr->peekRootDeviceEnvironment().osTime.get();
|
||||
uint64_t currCpuTimeInNS;
|
||||
osTime->getCpuTime(&currCpuTimeInNS);
|
||||
auto timeToWait = currCpuTimeInNS + timeToPollTagUpdateNS;
|
||||
|
||||
// unblock csr during polling
|
||||
csrLock.unlock();
|
||||
while (currCpuTimeInNS < timeToWait) {
|
||||
if (!csr->isBusy()) {
|
||||
break;
|
||||
}
|
||||
osTime->getCpuTime(&currCpuTimeInNS);
|
||||
}
|
||||
csrLock.lock();
|
||||
return !csr->isBusy();
|
||||
}
|
||||
|
||||
SteadyClock::time_point DirectSubmissionController::getCpuTimestamp() {
|
||||
return SteadyClock::now();
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ struct WaitForPagingFenceRequest {
|
||||
class DirectSubmissionController {
|
||||
public:
|
||||
static constexpr size_t defaultTimeout = 5'000;
|
||||
static constexpr size_t timeToPollTagUpdateNS = 20'000;
|
||||
DirectSubmissionController();
|
||||
virtual ~DirectSubmissionController();
|
||||
|
||||
@@ -86,6 +87,7 @@ class DirectSubmissionController {
|
||||
|
||||
static void *controlDirectSubmissionsState(void *self);
|
||||
void checkNewSubmissions();
|
||||
bool isDirectSubmissionIdle(CommandStreamReceiver *csr, std::unique_lock<std::recursive_mutex> &csrLock);
|
||||
MOCKABLE_VIRTUAL bool sleep(std::unique_lock<std::mutex> &lock);
|
||||
MOCKABLE_VIRTUAL SteadyClock::time_point getCpuTimestamp();
|
||||
|
||||
@@ -115,6 +117,7 @@ class DirectSubmissionController {
|
||||
std::unordered_map<size_t, TimeoutParams> timeoutParamsMap;
|
||||
QueueThrottle lowestThrottleSubmitted = QueueThrottle::HIGH;
|
||||
bool adjustTimeoutOnThrottleAndAcLineStatus = false;
|
||||
bool isCsrIdleDetectionEnabled = false;
|
||||
|
||||
std::condition_variable condVar;
|
||||
std::mutex condVarMutex;
|
||||
|
||||
@@ -204,6 +204,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
|
||||
void stopDirectSubmission(bool blocking) override {
|
||||
this->blockingStopDirectSubmissionCalled = blocking;
|
||||
stopDirectSubmissionCalledTimes++;
|
||||
}
|
||||
|
||||
bool createPreemptionAllocation() override {
|
||||
@@ -264,6 +265,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
uint32_t makeResidentCalledTimes = 0;
|
||||
uint32_t downloadAllocationsCalledCount = 0;
|
||||
uint32_t submitDependencyUpdateCalledTimes = 0;
|
||||
uint32_t stopDirectSubmissionCalledTimes = 0;
|
||||
int hostPtrSurfaceCreationMutexLockCount = 0;
|
||||
bool multiOsContextCapable = false;
|
||||
bool memoryCompressionEnabled = false;
|
||||
|
||||
@@ -630,4 +630,5 @@ IgnoreZebinUnknownAttributes = 0
|
||||
FifoPollInterval = -1
|
||||
MaxSubSlicesSupportedOverride = -1
|
||||
ForceWddmHugeChunkSizeMB = -1
|
||||
DirectSubmissionControllerIdleDetection = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
@@ -7,10 +7,12 @@
|
||||
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/os_thread.h"
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_ostime.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h"
|
||||
|
||||
@@ -38,6 +40,7 @@ TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerWhenRegiste
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
@@ -83,6 +86,7 @@ TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerAndDivisorD
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
@@ -203,6 +207,7 @@ TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerAndAdjustOn
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->initOsTime();
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
MockCommandStreamReceiver csr(executionEnvironment, 0, deviceBitfield);
|
||||
@@ -210,7 +215,6 @@ TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerAndAdjustOn
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular},
|
||||
PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
csr.setupContext(*osContext.get());
|
||||
|
||||
DirectSubmissionControllerMock controller;
|
||||
controller.timeoutElapsedReturnValue.store(true);
|
||||
controller.setTimeoutParamsForPlatform(csr.getProductHelper());
|
||||
@@ -652,4 +656,97 @@ TEST(DirectSubmissionControllerTests, givenDirectSubmissionControllerWhenCheckTi
|
||||
EXPECT_FALSE(controller.timeoutElapsed());
|
||||
}
|
||||
|
||||
struct TagUpdateMockCommandStreamReceiver : public MockCommandStreamReceiver {
|
||||
|
||||
TagUpdateMockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
|
||||
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
|
||||
|
||||
SubmissionStatus flushTagUpdate() override {
|
||||
flushTagUpdateCalledTimes++;
|
||||
return SubmissionStatus::success;
|
||||
}
|
||||
|
||||
bool isBusy() override {
|
||||
return isBusyReturnValue;
|
||||
}
|
||||
|
||||
uint32_t flushTagUpdateCalledTimes = 0;
|
||||
bool isBusyReturnValue = false;
|
||||
};
|
||||
|
||||
struct DirectSubmissionIdleDetectionTests : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
debugManager.flags.DirectSubmissionControllerIdleDetection.set(true);
|
||||
controller = std::make_unique<DirectSubmissionControllerMock>();
|
||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||
executionEnvironment.initializeMemoryManager();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->osTime.reset(new MockOSTime{});
|
||||
|
||||
DeviceBitfield deviceBitfield(1);
|
||||
csr = std::make_unique<TagUpdateMockCommandStreamReceiver>(executionEnvironment, 0, deviceBitfield);
|
||||
osContext.reset(OsContext::create(nullptr, 0, 0,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::regular},
|
||||
PreemptionMode::ThreadGroup, deviceBitfield)));
|
||||
csr->setupContext(*osContext);
|
||||
|
||||
controller->timeoutElapsedReturnValue.store(true);
|
||||
controller->registerDirectSubmission(csr.get());
|
||||
csr->taskCount.store(10u);
|
||||
controller->checkNewSubmissions();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
controller->unregisterDirectSubmission(csr.get());
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
std::unique_ptr<OsContext> osContext;
|
||||
std::unique_ptr<TagUpdateMockCommandStreamReceiver> csr;
|
||||
std::unique_ptr<DirectSubmissionControllerMock> controller;
|
||||
};
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenLatestFlushedTaskSameAsTaskCountAndGpuBusyThenDontTerminateDirectSubmission) {
|
||||
csr->setLatestFlushedTaskCount(10u);
|
||||
csr->isBusyReturnValue = true;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_FALSE(controller->directSubmissions[csr.get()].isStopped);
|
||||
EXPECT_EQ(controller->directSubmissions[csr.get()].taskCount, 10u);
|
||||
EXPECT_EQ(0u, csr->stopDirectSubmissionCalledTimes);
|
||||
EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenLatestFlushedTaskSameAsTaskCountAndGpuIdleThenTerminateDirectSubmission) {
|
||||
csr->setLatestFlushedTaskCount(10u);
|
||||
csr->isBusyReturnValue = false;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_TRUE(controller->directSubmissions[csr.get()].isStopped);
|
||||
EXPECT_EQ(controller->directSubmissions[csr.get()].taskCount, 10u);
|
||||
EXPECT_EQ(1u, csr->stopDirectSubmissionCalledTimes);
|
||||
EXPECT_EQ(0u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenLatestFlushedTaskLowerThanTaskCountAndGpuBusyThenFlushTagAndDontTerminateDirectSubmission) {
|
||||
csr->isBusyReturnValue = true;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_FALSE(controller->directSubmissions[csr.get()].isStopped);
|
||||
EXPECT_EQ(controller->directSubmissions[csr.get()].taskCount, 10u);
|
||||
EXPECT_EQ(0u, csr->stopDirectSubmissionCalledTimes);
|
||||
EXPECT_EQ(1u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
TEST_F(DirectSubmissionIdleDetectionTests, givenLatestFlushedTaskLowerThanTaskCountAndGpuIdleThenFlushTagAndTerminateDirectSubmission) {
|
||||
csr->isBusyReturnValue = false;
|
||||
|
||||
controller->checkNewSubmissions();
|
||||
EXPECT_TRUE(controller->directSubmissions[csr.get()].isStopped);
|
||||
EXPECT_EQ(controller->directSubmissions[csr.get()].taskCount, 10u);
|
||||
EXPECT_EQ(1u, csr->stopDirectSubmissionCalledTimes);
|
||||
EXPECT_EQ(1u, csr->flushTagUpdateCalledTimes);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user