feature: Add 3-level wait scheme with tpause intrinsic

Related-To: NEO-14336

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-03-21 08:27:46 +00:00
committed by Compute-Runtime-Automation
parent 4e9afb32e6
commit 8a85a96ed2
22 changed files with 252 additions and 103 deletions

View File

@@ -49,6 +49,14 @@ void pause() {
_mm_pause();
}
uint8_t tpause(uint32_t control, uint64_t counter) {
#ifdef SUPPORTS_WAITPKG
return _tpause(control, counter);
#else
return 0;
#endif
}
unsigned char umwait(unsigned int ctrl, uint64_t counter) {
#ifdef SUPPORTS_WAITPKG
return _umwait(ctrl, counter);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,6 +20,8 @@ void clFlushOpt(void *ptr);
void pause();
uint8_t tpause(uint32_t control, uint64_t counter);
unsigned char umwait(unsigned int ctrl, uint64_t counter);
void umonitor(void *a);

View File

@@ -14,9 +14,11 @@ namespace NEO {
namespace WaitUtils {
WaitpkgUse waitpkgUse = WaitpkgUse::uninitialized;
int64_t waitPkgThresholdInMicroSeconds = defaultWaitPkgThresholdInMicroSeconds;
uint64_t waitpkgCounterValue = defaultCounterValue;
uint32_t waitpkgControlValue = defaultControlValue;
uint32_t waitCount = defaultWaitCount;
#ifdef SUPPORTS_WAITPKG
@@ -24,37 +26,41 @@ bool waitpkgSupport = SUPPORTS_WAITPKG;
#else
bool waitpkgSupport = false;
#endif
bool waitpkgUse = false;
void init(bool enable) {
if (waitpkgUse) {
void init(WaitpkgUse inputWaitpkgUse) {
if (debugManager.flags.WaitLoopCount.get() != -1) {
waitCount = debugManager.flags.WaitLoopCount.get();
}
if (waitpkgUse > WaitpkgUse::noUse) {
return;
}
if (!(waitpkgSupport && CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureWaitPkg))) {
waitpkgUse = WaitpkgUse::noUse;
return;
}
if (debugManager.flags.EnableWaitpkg.get() != -1) {
enable = debugManager.flags.EnableWaitpkg.get();
inputWaitpkgUse = static_cast<WaitpkgUse>(debugManager.flags.EnableWaitpkg.get());
}
if (enable && waitpkgSupport) {
if (CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureWaitPkg)) {
waitpkgUse = true;
waitCount = 0;
}
waitpkgUse = inputWaitpkgUse;
if (waitpkgUse == WaitpkgUse::umonitorAndUmwait) {
waitCount = 0u;
}
int64_t overrideWaitPkgCounter = debugManager.flags.WaitpkgCounterValue.get();
if (overrideWaitPkgCounter != -1) {
waitpkgCounterValue = static_cast<uint64_t>(overrideWaitPkgCounter);
if (debugManager.flags.WaitpkgCounterValue.get() != -1) {
waitpkgCounterValue = debugManager.flags.WaitpkgCounterValue.get();
}
int32_t overrideWaitPkgControl = debugManager.flags.WaitpkgControlValue.get();
if (overrideWaitPkgControl != -1) {
waitpkgControlValue = static_cast<uint32_t>(overrideWaitPkgControl);
if (debugManager.flags.WaitpkgControlValue.get() != -1) {
waitpkgControlValue = debugManager.flags.WaitpkgControlValue.get();
}
int32_t overrideWaitCount = debugManager.flags.WaitLoopCount.get();
if (overrideWaitCount != -1) {
waitCount = static_cast<uint32_t>(overrideWaitCount);
if (debugManager.flags.WaitpkgThreshold.get() != -1) {
waitPkgThresholdInMicroSeconds = debugManager.flags.WaitpkgThreshold.get();
}
}

View File

@@ -17,36 +17,52 @@ namespace NEO {
namespace WaitUtils {
enum class WaitpkgUse : int32_t {
uninitialized = -1,
noUse = 0,
umonitorAndUmwait,
tpause
};
constexpr int64_t defaultWaitPkgThresholdInMicroSeconds = 1;
constexpr uint64_t defaultCounterValue = 16000;
constexpr uint32_t defaultControlValue = 0;
constexpr uint32_t defaultWaitCount = 1u;
extern WaitpkgUse waitpkgUse;
extern int64_t waitPkgThresholdInMicroSeconds;
extern uint64_t waitpkgCounterValue;
extern uint32_t waitpkgControlValue;
extern uint32_t waitCount;
extern bool waitpkgSupport;
extern bool waitpkgUse;
inline bool monitorWait(volatile void const *monitorAddress, uint64_t counterModifier) {
uint64_t currentCounter = CpuIntrinsics::rdtsc();
currentCounter += (waitpkgCounterValue + counterModifier);
inline void tpause() {
uint64_t currentCounter = CpuIntrinsics::rdtsc() + waitpkgCounterValue;
CpuIntrinsics::tpause(waitpkgControlValue, currentCounter);
}
inline bool monitorWait(volatile void const *monitorAddress) {
uint64_t currentCounter = CpuIntrinsics::rdtsc() + (waitpkgCounterValue);
CpuIntrinsics::umonitor(const_cast<void *>(monitorAddress));
bool result = CpuIntrinsics::umwait(waitpkgControlValue, currentCounter) == 0;
return result;
return CpuIntrinsics::umwait(waitpkgControlValue, currentCounter) == 0;
}
template <typename T>
inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedValue, std::function<bool(T, T)> predicate) {
for (uint32_t i = 0; i < waitCount; i++) {
CpuIntrinsics::pause();
inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedValue, std::function<bool(T, T)> predicate, int64_t timeElapsedSinceWaitStarted) {
if (waitpkgUse == WaitpkgUse::tpause && timeElapsedSinceWaitStarted > waitPkgThresholdInMicroSeconds) {
tpause();
} else {
for (uint32_t i = 0; i < waitCount; i++) {
CpuIntrinsics::pause();
}
}
if (pollAddress != nullptr) {
if (predicate(*pollAddress, expectedValue)) {
return true;
}
if (waitpkgUse) {
if (monitorWait(pollAddress, 0)) {
if (waitpkgUse == WaitpkgUse::umonitorAndUmwait) {
if (monitorWait(pollAddress)) {
if (predicate(*pollAddress, expectedValue)) {
return true;
}
@@ -57,11 +73,11 @@ inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedV
return false;
}
inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue) {
return waitFunctionWithPredicate<TaskCountType>(pollAddress, expectedValue, std::greater_equal<TaskCountType>());
inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue, int64_t timeElapsedSinceWaitStarted) {
return waitFunctionWithPredicate<TaskCountType>(pollAddress, expectedValue, std::greater_equal<TaskCountType>(), timeElapsedSinceWaitStarted);
}
void init(bool enable);
void init(WaitpkgUse inputWaitpkgUse);
} // namespace WaitUtils
} // namespace NEO