From 8a85a96ed202e3886cbdc94c21cbb5d5c9a8d492 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Fri, 21 Mar 2025 08:27:46 +0000 Subject: [PATCH] feature: Add 3-level wait scheme with tpause intrinsic Related-To: NEO-14336 Signed-off-by: Lukasz Jobczyk --- .../source/cmdlist/cmdlist_hw_immediate.inl | 2 +- level_zero/core/source/event/event_impl.inl | 8 +- .../unit_tests/sources/fence/test_fence.cpp | 4 +- .../command_queue/command_queue_hw_base.inl | 10 +- .../api/cl_enqueue_unmap_mem_object_tests.inl | 4 +- ...and_stream_receiver_flush_task_4_tests.cpp | 6 +- .../helpers/timestamp_packet_1_tests.cpp | 2 +- .../command_stream_receiver.cpp | 10 +- .../command_stream_receiver_hw_base.inl | 2 +- .../debug_settings/debug_variables_base.inl | 5 +- .../linux/drm_direct_submission.inl | 3 +- .../execution_environment.cpp | 2 +- shared/source/utilities/cpuintrinsics.cpp | 8 + shared/source/utilities/cpuintrinsics.h | 4 +- shared/source/utilities/wait_util.cpp | 44 ++--- shared/source/utilities/wait_util.h | 44 +++-- .../test/common/base_ult_config_listener.cpp | 3 +- shared/test/common/test_files/igdrcl.config | 1 + .../test/common/utilities/cpuintrinsics.cpp | 9 +- .../linux/drm_direct_submission_tests.cpp | 4 +- .../unit_test/utilities/wait_util_tests.cpp | 20 +-- .../x86_64/wait_util_tests_x86_64.cpp | 160 ++++++++++++++---- 22 files changed, 252 insertions(+), 103 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index f8b10f0bce..2adeaa2667 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1655,7 +1655,7 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), inOrderExecInfo->getAllocationOffset()); for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) { - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal(), timeDiff / 1000)) { signaled = false; break; } diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 53c83381ca..6435f208ba 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -268,7 +268,7 @@ ze_result_t EventImp::queryCounterBasedEventStatus() { bool signaled = true; const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset); for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) { - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal(), 0)) { signaled = false; break; } @@ -362,7 +362,8 @@ ze_result_t EventImp::queryStatusEventPackets() { bool ready = NEO::WaitUtils::waitFunctionWithPredicate( static_cast(queryAddress), queryVal, - std::not_equal_to()); + std::not_equal_to(), + 0); if (!ready) { return ZE_RESULT_NOT_READY; } @@ -378,7 +379,8 @@ ze_result_t EventImp::queryStatusEventPackets() { bool ready = NEO::WaitUtils::waitFunctionWithPredicate( static_cast(queryAddress), queryVal, - std::not_equal_to()); + std::not_equal_to(), + 0); if (!ready) { return ZE_RESULT_NOT_READY; } diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index 825c4ca327..eb36883363 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -241,7 +241,7 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh constexpr uint32_t activePartitions = 2; constexpr uint32_t postSyncOffset = 16; - VariableBackup backupWaitpkgUse(&NEO::WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&NEO::WaitUtils::waitCount, 1); const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 8474945967..e48a4625af 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -148,6 +148,7 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container if (container) { auto lastHangCheckTime = std::chrono::high_resolution_clock::now(); + auto waitStartTime = lastHangCheckTime; for (const auto ×tamp : container->peekNodes()) { for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) { if (printWaitForCompletion) { @@ -155,8 +156,11 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container } while (timestamp->getContextEndValue(i) == 1) { csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex())); - WaitUtils::waitFunctionWithPredicate(static_cast(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to()); - if (csr.checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) { + + auto currentTime = std::chrono::high_resolution_clock::now(); + WaitUtils::waitFunctionWithPredicate(static_cast(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to(), std::chrono::duration_cast(currentTime - waitStartTime).count()); + + if (csr.checkGpuHangDetected(currentTime, lastHangCheckTime)) { status = WaitStatus::gpuHang; if (printWaitForCompletion) { printf("\nWaiting for TS failed"); diff --git a/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl b/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl index 9ae4043803..680f56b3cd 100644 --- a/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -82,7 +82,7 @@ TEST_F(ClEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnCpuThenRetur TEST_F(ClEnqueueUnmapMemObjTests, givenZeroCopyWithoutCoherencyAllowedWhenMapAndUnmapThenFlushCachelines) { DebugManagerStateRestore restorer; debugManager.flags.AllowZeroCopyWithoutCoherency.set(1); - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index db6e3950f2..9df2b2c542 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -805,7 +805,7 @@ extern TaskCountType pauseValue; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitWhenTagValueSwitchesThenWaitFunctionReturnsTrue) { VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); @@ -825,7 +825,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitAndIndefinitelyPollWhenWaitForCompletionThenDoNotCallWaitUtils) { VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 664d366702..a691ccf35d 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -1113,7 +1113,7 @@ extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCallWaitUtils) { - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); DebugManagerStateRestore restorer; diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 018877d7aa..6649e3f826 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -504,10 +504,10 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; for (uint32_t i = 0; i < activePartitions; i++) { - while (*partitionAddress < taskCountToWait && timeDiff <= params.waitTimeout) { + while (*partitionAddress < taskCountToWait && (!params.enableTimeout || timeDiff <= params.waitTimeout)) { this->downloadTagAllocation(taskCountToWait); - if (!params.indefinitelyPoll && WaitUtils::waitFunction(partitionAddress, taskCountToWait)) { + if (!params.indefinitelyPoll && WaitUtils::waitFunction(partitionAddress, taskCountToWait, timeDiff)) { break; } @@ -516,9 +516,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll return WaitStatus::gpuHang; } - if (params.enableTimeout) { - timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); - } + timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); } partitionAddress = ptrOffset(partitionAddress, this->immWritePostSyncWriteOffset); @@ -1047,7 +1045,7 @@ void CommandStreamReceiver::downloadTagAllocation(TaskCountType taskCountToWait) bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) { this->downloadTagAllocation(taskCountToWait); for (uint32_t i = 0; i < activePartitions; i++) { - if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { + if (!WaitUtils::waitFunction(pollAddress, taskCountToWait, 0)) { return false; } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 23a1902bc8..777bc66351 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1434,7 +1434,7 @@ inline bool CommandStreamReceiverHw::initDirectSubmission() { this->osContext->setDirectSubmissionActive(); if (this->osContext->isDirectSubmissionLightActive()) { this->pushAllocationsForMakeResident = false; - WaitUtils::init(true); + WaitUtils::init(WaitUtils::WaitpkgUse::umonitorAndUmwait); } } } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3d2649b2bc..b65550553a 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -108,8 +108,9 @@ DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "Place memory DECLARE_DEBUG_VARIABLE(int64_t, ForceCompressionDisabledForCompressedBlitCopies, -1, "If compression is required, set AUX_CCS_E, but force CompressionEnable filed; 0 should result in uncompressed read/write; values = -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int64_t, WddmPagingFenceCpuWaitDelayTime, 0, "Amount of microseconds after waiting for paging fence on CPU") DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: default - user provided timeout value, >0: timeout in nanoseconds") -DECLARE_DEBUG_VARIABLE(int64_t, WaitpkgCounterValue, -1, "-1: use default, >=0: use constant value added for umwait counter") +DECLARE_DEBUG_VARIABLE(int64_t, WaitpkgCounterValue, -1, "-1: use default, >=0: use constant value added for umwait or tpause counter") DECLARE_DEBUG_VARIABLE(int32_t, WaitpkgControlValue, -1, "-1: use default, 0: slower wakeup - larger power savings, 1: faster wakeup - smaller power savings") +DECLARE_DEBUG_VARIABLE(int32_t, WaitpkgThreshold, -1, "-1: use default, >=0: When waitpkg in tpause mode, apply tpause waits after given threshold in us") DECLARE_DEBUG_VARIABLE(int32_t, ForceL1Caching, -1, "Program L1 cache policy for surface state and stateless accesses; values = -1: default, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationEnabled, -1, "Require AUX translation for kernels; values = -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, OverrideStatelessMocsIndex, -1, "Program provided MOCS index for stateless accesses in state base address for regular buffers; ignore when -1") @@ -534,7 +535,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseExternalAllocatorForSshAndDsh, -1, "Use 32 bi DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force per subslice slm size in KB; ignore when -1") DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second") DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop") -DECLARE_DEBUG_VARIABLE(int32_t, EnableWaitpkg, -1, "-1: use default, 0: disable, 1: enable") +DECLARE_DEBUG_VARIABLE(int32_t, EnableWaitpkg, -1, "-1: use default, 0: disable, 1: UMONITOR/UMWAIT 2: TPAUSE") DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GTPin to allocate buffer in shared memory") DECLARE_DEBUG_VARIABLE(int32_t, AlignLocalMemoryVaTo2MB, -1, "Allow 2MB pages for allocations with size>=2MB. On Linux it means aligned VA, on Windows it means aligned size. -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceForCompletionWait, -1, "-1: default (disabled), 0: disable, 1: enable : Use Wait User Fence instead Gem Wait") diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index eb11b28de6..2de373671d 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -288,9 +288,10 @@ bool DrmDirectSubmission::isCompletionFenceSupported() { template void DrmDirectSubmission::wait(TaskCountType taskCountToWait) { auto lastHangCheckTime = std::chrono::high_resolution_clock::now(); + auto waitStartTime = lastHangCheckTime; auto pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activeTiles; i++) { - while (!WaitUtils::waitFunction(pollAddress, taskCountToWait) && + while (!WaitUtils::waitFunction(pollAddress, taskCountToWait, std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - waitStartTime).count()) && !isGpuHangDetected(lastHangCheckTime)) { } pollAddress = ptrOffset(pollAddress, this->immWritePostSyncOffset); diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index cf6fe13f5e..9cd3e35acf 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -30,7 +30,7 @@ namespace NEO { ExecutionEnvironment::ExecutionEnvironment() { - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); this->configureNeoEnvironment(); } diff --git a/shared/source/utilities/cpuintrinsics.cpp b/shared/source/utilities/cpuintrinsics.cpp index 781f831a7d..6f6ec7bc50 100644 --- a/shared/source/utilities/cpuintrinsics.cpp +++ b/shared/source/utilities/cpuintrinsics.cpp @@ -49,6 +49,14 @@ void pause() { _mm_pause(); } +uint8_t tpause(uint32_t control, uint64_t counter) { +#ifdef SUPPORTS_WAITPKG + return _tpause(control, counter); +#else + return 0; +#endif +} + unsigned char umwait(unsigned int ctrl, uint64_t counter) { #ifdef SUPPORTS_WAITPKG return _umwait(ctrl, counter); diff --git a/shared/source/utilities/cpuintrinsics.h b/shared/source/utilities/cpuintrinsics.h index 934a43314c..3b2a6a6abb 100644 --- a/shared/source/utilities/cpuintrinsics.h +++ b/shared/source/utilities/cpuintrinsics.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,6 +20,8 @@ void clFlushOpt(void *ptr); void pause(); +uint8_t tpause(uint32_t control, uint64_t counter); + unsigned char umwait(unsigned int ctrl, uint64_t counter); void umonitor(void *a); diff --git a/shared/source/utilities/wait_util.cpp b/shared/source/utilities/wait_util.cpp index c2d4352f49..65331f4b59 100644 --- a/shared/source/utilities/wait_util.cpp +++ b/shared/source/utilities/wait_util.cpp @@ -14,9 +14,11 @@ namespace NEO { namespace WaitUtils { +WaitpkgUse waitpkgUse = WaitpkgUse::uninitialized; + +int64_t waitPkgThresholdInMicroSeconds = defaultWaitPkgThresholdInMicroSeconds; uint64_t waitpkgCounterValue = defaultCounterValue; uint32_t waitpkgControlValue = defaultControlValue; - uint32_t waitCount = defaultWaitCount; #ifdef SUPPORTS_WAITPKG @@ -24,37 +26,41 @@ bool waitpkgSupport = SUPPORTS_WAITPKG; #else bool waitpkgSupport = false; #endif -bool waitpkgUse = false; -void init(bool enable) { - if (waitpkgUse) { +void init(WaitpkgUse inputWaitpkgUse) { + if (debugManager.flags.WaitLoopCount.get() != -1) { + waitCount = debugManager.flags.WaitLoopCount.get(); + } + + if (waitpkgUse > WaitpkgUse::noUse) { + return; + } + + if (!(waitpkgSupport && CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureWaitPkg))) { + waitpkgUse = WaitpkgUse::noUse; return; } if (debugManager.flags.EnableWaitpkg.get() != -1) { - enable = debugManager.flags.EnableWaitpkg.get(); + inputWaitpkgUse = static_cast(debugManager.flags.EnableWaitpkg.get()); } - if (enable && waitpkgSupport) { - if (CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureWaitPkg)) { - waitpkgUse = true; - waitCount = 0; - } + waitpkgUse = inputWaitpkgUse; + + if (waitpkgUse == WaitpkgUse::umonitorAndUmwait) { + waitCount = 0u; } - int64_t overrideWaitPkgCounter = debugManager.flags.WaitpkgCounterValue.get(); - if (overrideWaitPkgCounter != -1) { - waitpkgCounterValue = static_cast(overrideWaitPkgCounter); + if (debugManager.flags.WaitpkgCounterValue.get() != -1) { + waitpkgCounterValue = debugManager.flags.WaitpkgCounterValue.get(); } - int32_t overrideWaitPkgControl = debugManager.flags.WaitpkgControlValue.get(); - if (overrideWaitPkgControl != -1) { - waitpkgControlValue = static_cast(overrideWaitPkgControl); + if (debugManager.flags.WaitpkgControlValue.get() != -1) { + waitpkgControlValue = debugManager.flags.WaitpkgControlValue.get(); } - int32_t overrideWaitCount = debugManager.flags.WaitLoopCount.get(); - if (overrideWaitCount != -1) { - waitCount = static_cast(overrideWaitCount); + if (debugManager.flags.WaitpkgThreshold.get() != -1) { + waitPkgThresholdInMicroSeconds = debugManager.flags.WaitpkgThreshold.get(); } } diff --git a/shared/source/utilities/wait_util.h b/shared/source/utilities/wait_util.h index ea7c5656b2..bd1403160e 100644 --- a/shared/source/utilities/wait_util.h +++ b/shared/source/utilities/wait_util.h @@ -17,36 +17,52 @@ namespace NEO { namespace WaitUtils { +enum class WaitpkgUse : int32_t { + uninitialized = -1, + noUse = 0, + umonitorAndUmwait, + tpause +}; + +constexpr int64_t defaultWaitPkgThresholdInMicroSeconds = 1; constexpr uint64_t defaultCounterValue = 16000; constexpr uint32_t defaultControlValue = 0; constexpr uint32_t defaultWaitCount = 1u; +extern WaitpkgUse waitpkgUse; +extern int64_t waitPkgThresholdInMicroSeconds; extern uint64_t waitpkgCounterValue; extern uint32_t waitpkgControlValue; extern uint32_t waitCount; extern bool waitpkgSupport; -extern bool waitpkgUse; -inline bool monitorWait(volatile void const *monitorAddress, uint64_t counterModifier) { - uint64_t currentCounter = CpuIntrinsics::rdtsc(); - currentCounter += (waitpkgCounterValue + counterModifier); +inline void tpause() { + uint64_t currentCounter = CpuIntrinsics::rdtsc() + waitpkgCounterValue; + CpuIntrinsics::tpause(waitpkgControlValue, currentCounter); +} +inline bool monitorWait(volatile void const *monitorAddress) { + uint64_t currentCounter = CpuIntrinsics::rdtsc() + (waitpkgCounterValue); CpuIntrinsics::umonitor(const_cast(monitorAddress)); - bool result = CpuIntrinsics::umwait(waitpkgControlValue, currentCounter) == 0; - return result; + return CpuIntrinsics::umwait(waitpkgControlValue, currentCounter) == 0; } template -inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedValue, std::function predicate) { - for (uint32_t i = 0; i < waitCount; i++) { - CpuIntrinsics::pause(); +inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedValue, std::function predicate, int64_t timeElapsedSinceWaitStarted) { + if (waitpkgUse == WaitpkgUse::tpause && timeElapsedSinceWaitStarted > waitPkgThresholdInMicroSeconds) { + tpause(); + } else { + for (uint32_t i = 0; i < waitCount; i++) { + CpuIntrinsics::pause(); + } } + if (pollAddress != nullptr) { if (predicate(*pollAddress, expectedValue)) { return true; } - if (waitpkgUse) { - if (monitorWait(pollAddress, 0)) { + if (waitpkgUse == WaitpkgUse::umonitorAndUmwait) { + if (monitorWait(pollAddress)) { if (predicate(*pollAddress, expectedValue)) { return true; } @@ -57,11 +73,11 @@ inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedV return false; } -inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue) { - return waitFunctionWithPredicate(pollAddress, expectedValue, std::greater_equal()); +inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue, int64_t timeElapsedSinceWaitStarted) { + return waitFunctionWithPredicate(pollAddress, expectedValue, std::greater_equal(), timeElapsedSinceWaitStarted); } -void init(bool enable); +void init(WaitpkgUse inputWaitpkgUse); } // namespace WaitUtils } // namespace NEO diff --git a/shared/test/common/base_ult_config_listener.cpp b/shared/test/common/base_ult_config_listener.cpp index e7783442b5..634709d2c6 100644 --- a/shared/test/common/base_ult_config_listener.cpp +++ b/shared/test/common/base_ult_config_listener.cpp @@ -21,7 +21,8 @@ namespace NEO { extern unsigned int testCaseMaxTimeInMs; void BaseUltConfigListener::OnTestStart(const ::testing::TestInfo &) { - WaitUtils::waitpkgUse = false; + WaitUtils::waitpkgUse = WaitUtils::WaitpkgUse::uninitialized; + WaitUtils::waitPkgThresholdInMicroSeconds = WaitUtils::defaultWaitPkgThresholdInMicroSeconds; WaitUtils::waitpkgCounterValue = WaitUtils::defaultCounterValue; WaitUtils::waitpkgControlValue = WaitUtils::defaultControlValue; WaitUtils::waitCount = WaitUtils::defaultWaitCount; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 20fc405ff4..cb8aa31e6f 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -377,6 +377,7 @@ SkipFlushingEventsOnGetStatusCalls = 0 EnableWaitpkg = -1 WaitpkgControlValue = -1 WaitpkgCounterValue = -1 +WaitpkgThreshold = -1 AllowUnrestrictedSize = 0 ForceDefaultThreadArbitrationPolicyIfNotSpecified = 0 DoNotFreeResources = 0 diff --git a/shared/test/common/utilities/cpuintrinsics.cpp b/shared/test/common/utilities/cpuintrinsics.cpp index 4cb9544cca..42cf9c000b 100644 --- a/shared/test/common/utilities/cpuintrinsics.cpp +++ b/shared/test/common/utilities/cpuintrinsics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ std::atomic umonitorCounter(0u); std::atomic rdtscCounter(0u); +std::atomic_uint32_t tpauseCounter{}; + volatile TagAddressType *pauseAddress = nullptr; TaskCountType pauseValue = 0u; uint32_t pauseOffset = 0u; @@ -68,6 +70,11 @@ void pause() { } } +uint8_t tpause(uint32_t control, uint64_t counter) { + CpuIntrinsicsTests::tpauseCounter++; + return 0; +} + unsigned char umwait(unsigned int ctrl, uint64_t counter) { CpuIntrinsicsTests::lastUmwaitControl = ctrl; CpuIntrinsicsTests::lastUmwaitCounter = counter; diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index fad52eb4c1..7ccdbd1cd5 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -1007,7 +1007,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionNewResourceTlbFlushZeroAn HWCMDTEST_F(IGFX_XE_HP_CORE, DrmDirectSubmissionTest, givenMultipleActiveTilesWhenWaitingForTagUpdateThenQueryAllActiveTiles) { using Dispatcher = RenderDispatcher; - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); MockDrmDirectSubmission directSubmission(*device->getDefaultEngine().commandStreamReceiver); @@ -1283,7 +1283,7 @@ HWTEST_F(DrmDirectSubmissionTest, HWTEST_F(DrmDirectSubmissionTest, givenGpuHangWhenWaitCalledThenGpuHangDetected) { using Dispatcher = RenderDispatcher; - VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, false); + VariableBackup backupWaitpkgUse(&WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); VariableBackup backupWaitCount(&WaitUtils::waitCount, 1); MockDrmDirectSubmission directSubmission(*device->getDefaultEngine().commandStreamReceiver); diff --git a/shared/test/unit_test/utilities/wait_util_tests.cpp b/shared/test/unit_test/utilities/wait_util_tests.cpp index 14c2e20e53..111ce0d006 100644 --- a/shared/test/unit_test/utilities/wait_util_tests.cpp +++ b/shared/test/unit_test/utilities/wait_util_tests.cpp @@ -35,11 +35,11 @@ using WaitPredicateOnlyTest = Test; TEST_F(WaitPredicateOnlyTest, givenDefaultSettingsWhenNoPollAddressProvidedThenPauseDefaultTimeAndReturnFalse) { EXPECT_EQ(1u, WaitUtils::defaultWaitCount); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); - bool ret = WaitUtils::waitFunction(nullptr, 0u); + bool ret = WaitUtils::waitFunction(nullptr, 0u, 0); EXPECT_FALSE(ret); EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); } @@ -48,37 +48,37 @@ TEST_F(WaitPredicateOnlyTest, givenDebugFlagOverridesWhenNoPollAddressProvidedTh uint32_t count = 10u; debugManager.flags.WaitLoopCount.set(count); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(count, WaitUtils::waitCount); uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); - bool ret = WaitUtils::waitFunction(nullptr, 0u); + bool ret = WaitUtils::waitFunction(nullptr, 0u, 0); EXPECT_FALSE(ret); EXPECT_EQ(oldCount + count, CpuIntrinsicsTests::pauseCounter); } TEST_F(WaitPredicateOnlyTest, givenDefaultSettingsWhenPollAddressProvidedDoesNotMeetCriteriaThenPauseDefaultTimeAndReturnFalse) { - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); volatile TagAddressType pollValue = 1u; TaskCountType expectedValue = 3; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_FALSE(ret); EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); } TEST_F(WaitPredicateOnlyTest, givenDefaultSettingsWhenPollAddressProvidedMeetsCriteriaThenPauseDefaultTimeAndReturnTrue) { - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); volatile TagAddressType pollValue = 3u; TaskCountType expectedValue = 1; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_TRUE(ret); EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); } @@ -87,14 +87,14 @@ TEST_F(WaitPredicateOnlyTest, givenDebugFlagSetZeroWhenPollAddressProvidedMeetsC uint32_t count = 0u; debugManager.flags.WaitLoopCount.set(count); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(count, WaitUtils::waitCount); volatile TagAddressType pollValue = 3u; TaskCountType expectedValue = 1; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_TRUE(ret); EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); } diff --git a/shared/test/unit_test/utilities/x86_64/wait_util_tests_x86_64.cpp b/shared/test/unit_test/utilities/x86_64/wait_util_tests_x86_64.cpp index dc392eb374..dc1948a4c3 100644 --- a/shared/test/unit_test/utilities/x86_64/wait_util_tests_x86_64.cpp +++ b/shared/test/unit_test/utilities/x86_64/wait_util_tests_x86_64.cpp @@ -34,7 +34,7 @@ struct WaitPkgFixture { backupCpuInfo = std::make_unique>(mockCpuInfo); backupWaitpkgSupport = std::make_unique>(&WaitUtils::waitpkgSupport); - backupWaitpkgUse = std::make_unique>(&WaitUtils::waitpkgUse); + backupWaitpkgUse = std::make_unique>(&WaitUtils::waitpkgUse); backupWaitpkgCounter = std::make_unique>(&WaitUtils::waitpkgCounterValue); backupWaitpkgControl = std::make_unique>(&WaitUtils::waitpkgControlValue); backupWaitCount = std::make_unique>(&WaitUtils::waitCount); @@ -55,7 +55,7 @@ struct WaitPkgFixture { std::unique_ptr> backupCpuInfo; std::unique_ptr> backupCpuIdFunc; std::unique_ptr> backupWaitpkgSupport; - std::unique_ptr> backupWaitpkgUse; + std::unique_ptr> backupWaitpkgUse; std::unique_ptr> backupWaitpkgCounter; std::unique_ptr> backupWaitpkgControl; std::unique_ptr> backupWaitCount; @@ -74,22 +74,25 @@ extern std::atomic umonitorCounter; extern std::atomic rdtscCounter; +extern std::atomic_uint32_t tpauseCounter; + extern uint64_t rdtscRetValue; extern unsigned char umwaitRetValue; extern std::function controlUmwait; } // namespace CpuIntrinsicsTests +template struct WaitPkgEnabledFixture : public WaitPkgFixture { void setUp() { WaitPkgFixture::setUp(); - debugManager.flags.EnableWaitpkg.set(1); + debugManager.flags.EnableWaitpkg.set(waitpkgUse); CpuInfo::cpuidFunc = mockCpuidEnableAll; WaitUtils::waitpkgSupport = true; - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); CpuIntrinsicsTests::lastUmwaitCounter = 0; CpuIntrinsicsTests::lastUmwaitControl = 0; @@ -97,6 +100,7 @@ struct WaitPkgEnabledFixture : public WaitPkgFixture { CpuIntrinsicsTests::lastUmonitorPtr = 0; CpuIntrinsicsTests::umonitorCounter = 0; CpuIntrinsicsTests::rdtscCounter = 0; + CpuIntrinsicsTests::tpauseCounter = 0; backupCpuIntrinsicsRdtscRetValue = std::make_unique>(&CpuIntrinsicsTests::rdtscRetValue); backupCpuIntrinsicsUmwaitRetValue = std::make_unique>(&CpuIntrinsicsTests::umwaitRetValue); @@ -109,7 +113,8 @@ struct WaitPkgEnabledFixture : public WaitPkgFixture { }; using WaitPkgTest = Test; -using WaitPkgEnabledTest = Test; +using WaitPkgEnabledTest = Test>; +using WaitPkgTpauseEnabledTest = Test>; TEST_F(WaitPkgTest, givenDefaultSettingsAndWaitpkgSupportTrueWhenWaitInitializedThenWaitPkgNotEnabled) { CpuInfo::cpuidFunc = mockCpuidEnableAll; @@ -117,17 +122,18 @@ TEST_F(WaitPkgTest, givenDefaultSettingsAndWaitpkgSupportTrueWhenWaitInitialized EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_FALSE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::uninitialized); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); EXPECT_EQ(expectedWaitpkgSupport, WaitUtils::waitpkgSupport); WaitUtils::waitpkgSupport = true; - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_FALSE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); } TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportFalseWhenWaitInitializedThenWaitPkgNotEnabled) { @@ -135,11 +141,12 @@ TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportFalseWhenWaitIni debugManager.flags.EnableWaitpkg.set(1); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_FALSE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenDisabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInitializedThenWaitPkgNotEnabled) { @@ -147,11 +154,12 @@ TEST_F(WaitPkgTest, givenDisabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitIni debugManager.flags.EnableWaitpkg.set(0); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_FALSE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInitializedAndCpuDoesNotSupportOperandThenWaitPkgNotEnabled) { @@ -161,11 +169,12 @@ TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInit debugManager.flags.EnableWaitpkg.set(1); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_FALSE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::noUse); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInitializedAndCpuSupportsOperandThenWaitPkgEnabled) { @@ -175,12 +184,46 @@ TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInit debugManager.flags.EnableWaitpkg.set(1); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); +} + +TEST_F(WaitPkgTest, givenEnabledWaitPkgSetToTpauseAndWaitpkgSupportTrueWhenWaitInitializedAndCpuSupportsOperandThenWaitPkgEnabled) { + CpuInfo::cpuidFunc = mockCpuidEnableAll; + + WaitUtils::waitpkgSupport = true; + + debugManager.flags.EnableWaitpkg.set(2); + + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); + + EXPECT_EQ(1u, WaitUtils::waitCount); + EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); + EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::tpause); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); +} + +TEST_F(WaitPkgTest, givenEnabledWaitPkgSetToTpauseAndWaitpkgThresholdAndWaitpkgSupportTrueWhenWaitInitializedAndCpuSupportsOperandThenWaitPkgEnabled) { + CpuInfo::cpuidFunc = mockCpuidEnableAll; + + WaitUtils::waitpkgSupport = true; + + debugManager.flags.EnableWaitpkg.set(2); + debugManager.flags.WaitpkgThreshold.set(56789); + + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); + + EXPECT_EQ(1u, WaitUtils::waitCount); + EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); + EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::tpause); + EXPECT_EQ(56789, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenEnabledSetToTrueAndWaitpkgSupportTrueWhenWaitInitializedAndCpuSupportsOperandThenWaitPkgEnabled) { @@ -188,12 +231,27 @@ TEST_F(WaitPkgTest, givenEnabledSetToTrueAndWaitpkgSupportTrueWhenWaitInitialize WaitUtils::waitpkgSupport = true; - WaitUtils::init(true); + WaitUtils::init(WaitUtils::WaitpkgUse::umonitorAndUmwait); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); +} + +TEST_F(WaitPkgTest, givenEnabledSetToTpauseAndWaitpkgSupportTrueWhenWaitInitializedAndCpuSupportsOperandThenWaitPkgEnabled) { + CpuInfo::cpuidFunc = mockCpuidEnableAll; + + WaitUtils::waitpkgSupport = true; + + WaitUtils::init(WaitUtils::WaitpkgUse::tpause); + + EXPECT_EQ(1u, WaitUtils::waitCount); + EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); + EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::tpause); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenFullyEnabledWaitPkgAndOverrideCounterValueWhenWaitInitializedThenNewCounterValueSet) { @@ -204,11 +262,12 @@ TEST_F(WaitPkgTest, givenFullyEnabledWaitPkgAndOverrideCounterValueWhenWaitIniti debugManager.flags.EnableWaitpkg.set(1); debugManager.flags.WaitpkgCounterValue.set(1234); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(1234u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenFullyEnabledWaitPkgAndOverrideControlValueWhenWaitInitializedThenNewControlValueSet) { @@ -219,11 +278,12 @@ TEST_F(WaitPkgTest, givenFullyEnabledWaitPkgAndOverrideControlValueWhenWaitIniti debugManager.flags.EnableWaitpkg.set(1); debugManager.flags.WaitpkgControlValue.set(1); - WaitUtils::init(false); + WaitUtils::init(WaitUtils::WaitpkgUse::noUse); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(1u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInitializedTwiceThenInitOnce) { @@ -231,21 +291,23 @@ TEST_F(WaitPkgTest, givenEnabledWaitPkgSettingsAndWaitpkgSupportTrueWhenWaitInit WaitUtils::waitpkgSupport = true; - WaitUtils::init(true); + WaitUtils::init(WaitUtils::WaitpkgUse::umonitorAndUmwait); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); debugManager.flags.WaitpkgControlValue.set(1); - WaitUtils::init(true); + WaitUtils::init(WaitUtils::WaitpkgUse::umonitorAndUmwait); EXPECT_EQ(0u, WaitUtils::waitCount); EXPECT_EQ(16000u, WaitUtils::waitpkgCounterValue); EXPECT_EQ(0u, WaitUtils::waitpkgControlValue); - EXPECT_TRUE(WaitUtils::waitpkgUse); + EXPECT_EQ(WaitUtils::waitpkgUse, WaitUtils::WaitpkgUse::umonitorAndUmwait); + EXPECT_EQ(1, WaitUtils::waitPkgThresholdInMicroSeconds); } TEST_F(WaitPkgEnabledTest, givenMonitoredAddressChangedWhenAddressMatchesPredicateValueThenWaitReturnsTrue) { @@ -259,7 +321,7 @@ TEST_F(WaitPkgEnabledTest, givenMonitoredAddressChangedWhenAddressMatchesPredica pollValue = 1; }; - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_TRUE(ret); EXPECT_EQ(1u, CpuIntrinsicsTests::rdtscCounter); @@ -279,7 +341,7 @@ TEST_F(WaitPkgEnabledTest, givenMonitoredAddressNotChangesWhenMonitorTimeoutsThe CpuIntrinsicsTests::rdtscRetValue = 2500; CpuIntrinsicsTests::umwaitRetValue = 1; - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_FALSE(ret); EXPECT_EQ(1u, CpuIntrinsicsTests::rdtscCounter); @@ -299,7 +361,7 @@ TEST_F(WaitPkgEnabledTest, givenMonitoredAddressChangedWhenAddressNotMatchesPred CpuIntrinsicsTests::rdtscRetValue = 3700; CpuIntrinsicsTests::umwaitRetValue = 0; - bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue, 0); EXPECT_FALSE(ret); EXPECT_EQ(1u, CpuIntrinsicsTests::rdtscCounter); @@ -311,3 +373,43 @@ TEST_F(WaitPkgEnabledTest, givenMonitoredAddressChangedWhenAddressNotMatchesPred EXPECT_EQ(WaitUtils::waitpkgControlValue, CpuIntrinsicsTests::lastUmwaitControl); EXPECT_EQ(1u, CpuIntrinsicsTests::umwaitCounter); } + +TEST_F(WaitPkgEnabledTest, givenTimeElapsedSinceWaitStartedBelowThresholdWhenWaitThenDoNoTpause) { + volatile TagAddressType pollValue = 0u; + TaskCountType expectedValue = 1; + int64_t timeElapsedSinceWaitStarted = 0u; + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); + + WaitUtils::waitFunction(&pollValue, expectedValue, timeElapsedSinceWaitStarted); + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); +} + +TEST_F(WaitPkgEnabledTest, givenTimeElapsedSinceWaitStartedAboveThresholdWhenWaitThenDoNoTpause) { + volatile TagAddressType pollValue = 0u; + TaskCountType expectedValue = 1; + int64_t timeElapsedSinceWaitStarted = 5000u; + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); + + WaitUtils::waitFunction(&pollValue, expectedValue, timeElapsedSinceWaitStarted); + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); +} + +TEST_F(WaitPkgTpauseEnabledTest, givenTimeElapsedSinceWaitStartedBelowThresholdWhenWaitThenDoNoTpause) { + volatile TagAddressType pollValue = 0u; + TaskCountType expectedValue = 1; + int64_t timeElapsedSinceWaitStarted = 0u; + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); + + WaitUtils::waitFunction(&pollValue, expectedValue, timeElapsedSinceWaitStarted); + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); +} + +TEST_F(WaitPkgTpauseEnabledTest, givenTimeElapsedSinceWaitStartedAboveThresholdWhenWaitThenDoTpause) { + volatile TagAddressType pollValue = 0u; + TaskCountType expectedValue = 1; + int64_t timeElapsedSinceWaitStarted = 5000u; + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 0u); + + WaitUtils::waitFunction(&pollValue, expectedValue, timeElapsedSinceWaitStarted); + EXPECT_EQ(CpuIntrinsicsTests::tpauseCounter, 1u); +} \ No newline at end of file