From b9ed7de40ad48c50d8b194299aff6df0971113c1 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 30 Mar 2021 18:11:00 +0000 Subject: [PATCH] Parametrize wait operation Related-To: NEO-4759 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/event/event.cpp | 4 +- level_zero/core/source/fence/fence.cpp | 7 +- ...and_stream_receiver_flush_task_3_tests.cpp | 24 +++++ .../test/unit_test/test_files/igdrcl.config | 1 + .../command_stream_receiver.cpp | 6 +- .../debug_settings/debug_variables_base.inl | 1 + .../execution_environment.cpp | 5 +- shared/source/utilities/CMakeLists.txt | 2 + shared/source/utilities/wait_util.cpp | 27 ++++++ shared/source/utilities/wait_util.h | 37 ++++++++ .../fixtures/direct_submission_fixture.h | 2 + .../mocks/mock_command_stream_receiver.h | 1 + .../direct_submission_tests.cpp | 8 +- .../test/unit_test/utilities/CMakeLists.txt | 1 + .../unit_test/utilities/cpuintrinsics.cpp | 14 ++- .../utilities/cpuintrinsics_tests.cpp | 10 +- .../unit_test/utilities/wait_util_tests.cpp | 93 +++++++++++++++++++ 17 files changed, 223 insertions(+), 20 deletions(-) create mode 100644 shared/source/utilities/wait_util.cpp create mode 100644 shared/source/utilities/wait_util.h create mode 100644 shared/test/unit_test/utilities/wait_util_tests.cpp diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 137164d81b..193c5e3149 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -18,6 +18,7 @@ #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/utilities/cpuintrinsics.h" +#include "shared/source/utilities/wait_util.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" @@ -261,8 +262,7 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { return ZE_RESULT_SUCCESS; } - std::this_thread::yield(); - NEO::CpuIntrinsics::pause(); + NEO::WaitUtils::waitFunction(nullptr, 0u); if (timeout == std::numeric_limits::max()) { continue; diff --git a/level_zero/core/source/fence/fence.cpp b/level_zero/core/source/fence/fence.cpp index bd7bf8cf2b..65af73e46e 100644 --- a/level_zero/core/source/fence/fence.cpp +++ b/level_zero/core/source/fence/fence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,7 +10,7 @@ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/memory_manager.h" -#include "shared/source/utilities/cpuintrinsics.h" +#include "shared/source/utilities/wait_util.h" #include "hw_helpers.h" @@ -79,8 +79,7 @@ ze_result_t FenceImp::hostSynchronize(uint64_t timeout) { return ZE_RESULT_SUCCESS; } - std::this_thread::yield(); - NEO::CpuIntrinsics::pause(); + NEO::WaitUtils::waitFunction(nullptr, 0u); if (timeout == std::numeric_limits::max()) { continue; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index db8baf5655..d5ad1dd8db 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -9,6 +9,7 @@ #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" +#include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" @@ -2060,3 +2061,26 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhen } EXPECT_TRUE(found); } + +namespace CpuIntrinsicsTests { +extern volatile uint32_t *pauseAddress; +extern uint32_t pauseValue; +} // namespace CpuIntrinsicsTests + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitWhenTagValueSwitchesThenWaitFunctionReturnsTrue) { + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + + auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(mockCsr); + + uint32_t taskCountToWait = 2u; + + *mockCsr->tagAddress = 1u; + + CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress; + CpuIntrinsicsTests::pauseValue = taskCountToWait; + + bool ret = mockCsr->waitForCompletionWithTimeout(false, 1, taskCountToWait); + EXPECT_TRUE(ret); +} diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index eedb89d74f..24988cfbf7 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -226,3 +226,4 @@ UseBindlessDebugSip = 0 OverrideSlmAllocationSize = -1 OverrideSlmSize = -1 UseCyclesPerSecondTimer = 0 +WaitLoopCount = -1 diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index c1adb8d0f0..926e31ace1 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -27,6 +27,7 @@ #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/tag_allocator.h" +#include "shared/source/utilities/wait_util.h" namespace NEO { @@ -258,8 +259,9 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int time1 = std::chrono::high_resolution_clock::now(); while (*getTagAddress() < taskCountToWait && timeDiff <= timeoutMicroseconds) { - std::this_thread::yield(); - CpuIntrinsics::pause(); + if (WaitUtils::waitFunction(getTagAddress(), taskCountToWait)) { + break; + } if (enableTimeout) { time2 = std::chrono::high_resolution_clock::now(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 565c6c27ac..5359de396f 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -213,6 +213,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseAsyncDrmExec, -1, "-1: default, 0: Disabled 1 DECLARE_DEBUG_VARIABLE(int32_t, UseBindlessMode, -1, "Use precompiled builtins in bindless mode, -1: api dependent, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB") DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second") +DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop") /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index cff53dd442..f1d31b0c35 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -13,9 +13,12 @@ #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_environment.h" +#include "shared/source/utilities/wait_util.h" namespace NEO { -ExecutionEnvironment::ExecutionEnvironment() = default; +ExecutionEnvironment::ExecutionEnvironment() { + WaitUtils::init(); +} ExecutionEnvironment::~ExecutionEnvironment() { if (memoryManager) { diff --git a/shared/source/utilities/CMakeLists.txt b/shared/source/utilities/CMakeLists.txt index 16a2b5a2d5..76146664a3 100644 --- a/shared/source/utilities/CMakeLists.txt +++ b/shared/source/utilities/CMakeLists.txt @@ -39,6 +39,8 @@ set(NEO_CORE_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.inl ${CMAKE_CURRENT_SOURCE_DIR}/time_measure_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h + ${CMAKE_CURRENT_SOURCE_DIR}/wait_util.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wait_util.h ) set(NEO_CORE_UTILITIES_WINDOWS diff --git a/shared/source/utilities/wait_util.cpp b/shared/source/utilities/wait_util.cpp new file mode 100644 index 0000000000..96d1ac094d --- /dev/null +++ b/shared/source/utilities/wait_util.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/utilities/wait_util.h" + +#include "shared/source/debug_settings/debug_settings_manager.h" + +namespace NEO { + +namespace WaitUtils { + +uint32_t waitCount = defaultWaitCount; + +void init() { + int32_t overrideWaitCount = DebugManager.flags.WaitLoopCount.get(); + if (overrideWaitCount != -1) { + waitCount = static_cast(overrideWaitCount); + } +} + +} // namespace WaitUtils + +} // namespace NEO diff --git a/shared/source/utilities/wait_util.h b/shared/source/utilities/wait_util.h new file mode 100644 index 0000000000..a9941469f7 --- /dev/null +++ b/shared/source/utilities/wait_util.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/utilities/cpuintrinsics.h" + +#include +#include + +namespace NEO { + +namespace WaitUtils { + +constexpr uint32_t defaultWaitCount = 64u; +extern uint32_t waitCount; + +inline bool waitFunction(volatile uint32_t *pollAddress, uint32_t expectedValue) { + for (uint32_t i = 0; i < waitCount; i++) { + CpuIntrinsics::pause(); + } + if (pollAddress != nullptr) { + if (*pollAddress >= expectedValue) { + return true; + } + } + std::this_thread::yield(); + return false; +} + +void init(); +} // namespace WaitUtils + +} // namespace NEO diff --git a/shared/test/common/fixtures/direct_submission_fixture.h b/shared/test/common/fixtures/direct_submission_fixture.h index 6c17b5cb60..d50ad5858f 100644 --- a/shared/test/common/fixtures/direct_submission_fixture.h +++ b/shared/test/common/fixtures/direct_submission_fixture.h @@ -14,7 +14,9 @@ using namespace NEO; +namespace CpuIntrinsicsTests { extern std::atomic lastClFlushedPtr; +} struct DirectSubmissionFixture : public DeviceFixture { void SetUp() { diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 4c806e8de8..7aafd706f8 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -133,6 +133,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::pageTableManagerInitialized; using CommandStreamReceiver::requiredScratchSize; using CommandStreamReceiver::requiredThreadArbitrationPolicy; + using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskLevel; using CommandStreamReceiver::timestampPacketWriteEnabled; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp index dd6ad267bb..5a3a5c8894 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests.cpp @@ -35,11 +35,11 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledSetThenExpectNoCpuCach EXPECT_TRUE(directSubmission.disableCpuCacheFlush); uintptr_t expectedPtrVal = 0; - lastClFlushedPtr = 0; + CpuIntrinsicsTests::lastClFlushedPtr = 0; void *ptr = reinterpret_cast(0xABCD00u); size_t size = 64; directSubmission.cpuCachelineFlush(ptr, size); - EXPECT_EQ(expectedPtrVal, lastClFlushedPtr); + EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr); } HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCacheFlush) { @@ -51,11 +51,11 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCac EXPECT_FALSE(directSubmission.disableCpuCacheFlush); uintptr_t expectedPtrVal = 0xABCD00u; - lastClFlushedPtr = 0; + CpuIntrinsicsTests::lastClFlushedPtr = 0; void *ptr = reinterpret_cast(expectedPtrVal); size_t size = 64; directSubmission.cpuCachelineFlush(ptr, size); - EXPECT_EQ(expectedPtrVal, lastClFlushedPtr); + EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr); } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) { diff --git a/shared/test/unit_test/utilities/CMakeLists.txt b/shared/test/unit_test/utilities/CMakeLists.txt index 0d6ad350b5..24566934a5 100644 --- a/shared/test/unit_test/utilities/CMakeLists.txt +++ b/shared/test/unit_test/utilities/CMakeLists.txt @@ -22,6 +22,7 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wait_util_tests.cpp ) add_subdirectories() diff --git a/shared/test/unit_test/utilities/cpuintrinsics.cpp b/shared/test/unit_test/utilities/cpuintrinsics.cpp index cdf20b0372..d772c80933 100644 --- a/shared/test/unit_test/utilities/cpuintrinsics.cpp +++ b/shared/test/unit_test/utilities/cpuintrinsics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,19 +10,27 @@ #include #include +namespace CpuIntrinsicsTests { //std::atomic is used for sake of sanitation in MT tests std::atomic lastClFlushedPtr(0u); std::atomic pauseCounter(0u); +volatile uint32_t *pauseAddress = nullptr; +uint32_t pauseValue = 0u; +} // namespace CpuIntrinsicsTests + namespace NEO { namespace CpuIntrinsics { void clFlush(void const *ptr) { - lastClFlushedPtr = reinterpret_cast(ptr); + CpuIntrinsicsTests::lastClFlushedPtr = reinterpret_cast(ptr); } void pause() { - pauseCounter++; + CpuIntrinsicsTests::pauseCounter++; + if (CpuIntrinsicsTests::pauseAddress != nullptr) { + *CpuIntrinsicsTests::pauseAddress = CpuIntrinsicsTests::pauseValue; + } } } // namespace CpuIntrinsics diff --git a/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp b/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp index d9d1d9e42d..ff821dd5a1 100644 --- a/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp +++ b/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,18 +12,20 @@ #include #include +namespace CpuIntrinsicsTests { extern std::atomic lastClFlushedPtr; extern std::atomic pauseCounter; +} // namespace CpuIntrinsicsTests TEST(CpuIntrinsicsTest, whenClFlushIsCalledThenExpectToPassPtrToSystemCall) { uintptr_t flushAddr = 0x1234; void const *ptr = reinterpret_cast(flushAddr); NEO::CpuIntrinsics::clFlush(ptr); - EXPECT_EQ(flushAddr, lastClFlushedPtr); + EXPECT_EQ(flushAddr, CpuIntrinsicsTests::lastClFlushedPtr); } TEST(CpuIntrinsicsTest, whenPauseCalledThenExpectToIncreaseCounter) { - uint32_t oldCount = pauseCounter.load(); + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); NEO::CpuIntrinsics::pause(); - EXPECT_EQ(oldCount + 1, pauseCounter); + EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::pauseCounter); } diff --git a/shared/test/unit_test/utilities/wait_util_tests.cpp b/shared/test/unit_test/utilities/wait_util_tests.cpp new file mode 100644 index 0000000000..eaeb7163ea --- /dev/null +++ b/shared/test/unit_test/utilities/wait_util_tests.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/utilities/wait_util.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/variable_backup.h" + +#include "test.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +namespace CpuIntrinsicsTests { +extern std::atomic pauseCounter; +} // namespace CpuIntrinsicsTests + +TEST(WaitTest, givenDefaultSettingsWhenNoPollAddressProvidedThenPauseDefaultTimeAndReturnFalse) { + EXPECT_EQ(64u, WaitUtils::defaultWaitCount); + + WaitUtils::init(); + EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); + + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); + bool ret = WaitUtils::waitFunction(nullptr, 0u); + EXPECT_FALSE(ret); + EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); +} + +TEST(WaitTest, givenDebugFlagOverridesWhenNoPollAddressProvidedThenPauseDefaultTimeAndReturnFalse) { + DebugManagerStateRestore restore; + VariableBackup backupWaitCount(&WaitUtils::waitCount); + + uint32_t count = 10u; + DebugManager.flags.WaitLoopCount.set(count); + + WaitUtils::init(); + EXPECT_EQ(count, WaitUtils::waitCount); + + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); + bool ret = WaitUtils::waitFunction(nullptr, 0u); + EXPECT_FALSE(ret); + EXPECT_EQ(oldCount + count, CpuIntrinsicsTests::pauseCounter); +} + +TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedDoesNotMeetCriteriaThenPauseDefaultTimeAndReturnFalse) { + WaitUtils::init(); + EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); + + volatile uint32_t pollValue = 1u; + uint32_t expectedValue = 3; + + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + EXPECT_FALSE(ret); + EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); +} + +TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedMeetsCriteriaThenPauseDefaultTimeAndReturnTrue) { + WaitUtils::init(); + EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); + + volatile uint32_t pollValue = 3u; + uint32_t expectedValue = 1; + + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + EXPECT_TRUE(ret); + EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); +} + +TEST(WaitTest, givenDebugFlagSetZeroWhenPollAddressProvidedMeetsCriteriaThenPauseZeroTimesAndReturnTrue) { + DebugManagerStateRestore restore; + VariableBackup backupWaitCount(&WaitUtils::waitCount); + + uint32_t count = 0u; + DebugManager.flags.WaitLoopCount.set(count); + + WaitUtils::init(); + EXPECT_EQ(count, WaitUtils::waitCount); + + volatile uint32_t pollValue = 3u; + uint32_t expectedValue = 1; + + uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); + bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); + EXPECT_TRUE(ret); + EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter); +}