Parametrize wait operation

Related-To: NEO-4759


Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-03-30 18:11:00 +00:00
committed by Compute-Runtime-Automation
parent 2f59fafb96
commit b9ed7de40a
17 changed files with 223 additions and 20 deletions

View File

@@ -27,6 +27,7 @@
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/cpuintrinsics.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/source/utilities/wait_util.h"
namespace NEO {
@@ -258,8 +259,9 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
time1 = std::chrono::high_resolution_clock::now();
while (*getTagAddress() < taskCountToWait && timeDiff <= timeoutMicroseconds) {
std::this_thread::yield();
CpuIntrinsics::pause();
if (WaitUtils::waitFunction(getTagAddress(), taskCountToWait)) {
break;
}
if (enableTimeout) {
time2 = std::chrono::high_resolution_clock::now();

View File

@@ -213,6 +213,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseAsyncDrmExec, -1, "-1: default, 0: Disabled 1
DECLARE_DEBUG_VARIABLE(int32_t, UseBindlessMode, -1, "Use precompiled builtins in bindless mode, -1: api dependent, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmSize, -1, "Force different slm size than default in kB")
DECLARE_DEBUG_VARIABLE(int32_t, UseCyclesPerSecondTimer, 0, "0: default behavior, 0: disabled: Report L0 timer in nanosecond units, 1: enabled: Report L0 timer in cycles per second")
DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number of iterations in wait loop")
/*DRIVER TOGGLES*/
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")

View File

@@ -13,9 +13,12 @@
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
#include "shared/source/os_interface/os_environment.h"
#include "shared/source/utilities/wait_util.h"
namespace NEO {
ExecutionEnvironment::ExecutionEnvironment() = default;
ExecutionEnvironment::ExecutionEnvironment() {
WaitUtils::init();
}
ExecutionEnvironment::~ExecutionEnvironment() {
if (memoryManager) {

View File

@@ -39,6 +39,8 @@ set(NEO_CORE_UTILITIES
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.inl
${CMAKE_CURRENT_SOURCE_DIR}/time_measure_wrapper.h
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.h
)
set(NEO_CORE_UTILITIES_WINDOWS

View File

@@ -0,0 +1,27 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/wait_util.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
namespace NEO {
namespace WaitUtils {
uint32_t waitCount = defaultWaitCount;
void init() {
int32_t overrideWaitCount = DebugManager.flags.WaitLoopCount.get();
if (overrideWaitCount != -1) {
waitCount = static_cast<uint32_t>(overrideWaitCount);
}
}
} // namespace WaitUtils
} // namespace NEO

View File

@@ -0,0 +1,37 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/utilities/cpuintrinsics.h"
#include <cstdint>
#include <thread>
namespace NEO {
namespace WaitUtils {
constexpr uint32_t defaultWaitCount = 64u;
extern uint32_t waitCount;
inline bool waitFunction(volatile uint32_t *pollAddress, uint32_t expectedValue) {
for (uint32_t i = 0; i < waitCount; i++) {
CpuIntrinsics::pause();
}
if (pollAddress != nullptr) {
if (*pollAddress >= expectedValue) {
return true;
}
}
std::this_thread::yield();
return false;
}
void init();
} // namespace WaitUtils
} // namespace NEO

View File

@@ -14,7 +14,9 @@
using namespace NEO;
namespace CpuIntrinsicsTests {
extern std::atomic<uintptr_t> lastClFlushedPtr;
}
struct DirectSubmissionFixture : public DeviceFixture {
void SetUp() {

View File

@@ -133,6 +133,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::pageTableManagerInitialized;
using CommandStreamReceiver::requiredScratchSize;
using CommandStreamReceiver::requiredThreadArbitrationPolicy;
using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::taskLevel;
using CommandStreamReceiver::timestampPacketWriteEnabled;

View File

@@ -35,11 +35,11 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledSetThenExpectNoCpuCach
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
uintptr_t expectedPtrVal = 0;
lastClFlushedPtr = 0;
CpuIntrinsicsTests::lastClFlushedPtr = 0;
void *ptr = reinterpret_cast<void *>(0xABCD00u);
size_t size = 64;
directSubmission.cpuCachelineFlush(ptr, size);
EXPECT_EQ(expectedPtrVal, lastClFlushedPtr);
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
}
HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCacheFlush) {
@@ -51,11 +51,11 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCac
EXPECT_FALSE(directSubmission.disableCpuCacheFlush);
uintptr_t expectedPtrVal = 0xABCD00u;
lastClFlushedPtr = 0;
CpuIntrinsicsTests::lastClFlushedPtr = 0;
void *ptr = reinterpret_cast<void *>(expectedPtrVal);
size_t size = 64;
directSubmission.cpuCachelineFlush(ptr, size);
EXPECT_EQ(expectedPtrVal, lastClFlushedPtr);
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {

View File

@@ -22,6 +22,7 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wait_util_tests.cpp
)
add_subdirectories()

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,19 +10,27 @@
#include <atomic>
#include <cstdint>
namespace CpuIntrinsicsTests {
//std::atomic is used for sake of sanitation in MT tests
std::atomic<uintptr_t> lastClFlushedPtr(0u);
std::atomic<uint32_t> pauseCounter(0u);
volatile uint32_t *pauseAddress = nullptr;
uint32_t pauseValue = 0u;
} // namespace CpuIntrinsicsTests
namespace NEO {
namespace CpuIntrinsics {
void clFlush(void const *ptr) {
lastClFlushedPtr = reinterpret_cast<uintptr_t>(ptr);
CpuIntrinsicsTests::lastClFlushedPtr = reinterpret_cast<uintptr_t>(ptr);
}
void pause() {
pauseCounter++;
CpuIntrinsicsTests::pauseCounter++;
if (CpuIntrinsicsTests::pauseAddress != nullptr) {
*CpuIntrinsicsTests::pauseAddress = CpuIntrinsicsTests::pauseValue;
}
}
} // namespace CpuIntrinsics

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,18 +12,20 @@
#include <atomic>
#include <cstdint>
namespace CpuIntrinsicsTests {
extern std::atomic<uintptr_t> lastClFlushedPtr;
extern std::atomic<uint32_t> pauseCounter;
} // namespace CpuIntrinsicsTests
TEST(CpuIntrinsicsTest, whenClFlushIsCalledThenExpectToPassPtrToSystemCall) {
uintptr_t flushAddr = 0x1234;
void const *ptr = reinterpret_cast<void const *>(flushAddr);
NEO::CpuIntrinsics::clFlush(ptr);
EXPECT_EQ(flushAddr, lastClFlushedPtr);
EXPECT_EQ(flushAddr, CpuIntrinsicsTests::lastClFlushedPtr);
}
TEST(CpuIntrinsicsTest, whenPauseCalledThenExpectToIncreaseCounter) {
uint32_t oldCount = pauseCounter.load();
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
NEO::CpuIntrinsics::pause();
EXPECT_EQ(oldCount + 1, pauseCounter);
EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::pauseCounter);
}

View File

@@ -0,0 +1,93 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/utilities/wait_util.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "test.h"
#include "gtest/gtest.h"
using namespace NEO;
namespace CpuIntrinsicsTests {
extern std::atomic<uint32_t> pauseCounter;
} // namespace CpuIntrinsicsTests
TEST(WaitTest, givenDefaultSettingsWhenNoPollAddressProvidedThenPauseDefaultTimeAndReturnFalse) {
EXPECT_EQ(64u, WaitUtils::defaultWaitCount);
WaitUtils::init();
EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount);
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
bool ret = WaitUtils::waitFunction(nullptr, 0u);
EXPECT_FALSE(ret);
EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter);
}
TEST(WaitTest, givenDebugFlagOverridesWhenNoPollAddressProvidedThenPauseDefaultTimeAndReturnFalse) {
DebugManagerStateRestore restore;
VariableBackup<uint32_t> backupWaitCount(&WaitUtils::waitCount);
uint32_t count = 10u;
DebugManager.flags.WaitLoopCount.set(count);
WaitUtils::init();
EXPECT_EQ(count, WaitUtils::waitCount);
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
bool ret = WaitUtils::waitFunction(nullptr, 0u);
EXPECT_FALSE(ret);
EXPECT_EQ(oldCount + count, CpuIntrinsicsTests::pauseCounter);
}
TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedDoesNotMeetCriteriaThenPauseDefaultTimeAndReturnFalse) {
WaitUtils::init();
EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount);
volatile uint32_t pollValue = 1u;
uint32_t expectedValue = 3;
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);
EXPECT_FALSE(ret);
EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter);
}
TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedMeetsCriteriaThenPauseDefaultTimeAndReturnTrue) {
WaitUtils::init();
EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount);
volatile uint32_t pollValue = 3u;
uint32_t expectedValue = 1;
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);
EXPECT_TRUE(ret);
EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter);
}
TEST(WaitTest, givenDebugFlagSetZeroWhenPollAddressProvidedMeetsCriteriaThenPauseZeroTimesAndReturnTrue) {
DebugManagerStateRestore restore;
VariableBackup<uint32_t> backupWaitCount(&WaitUtils::waitCount);
uint32_t count = 0u;
DebugManager.flags.WaitLoopCount.set(count);
WaitUtils::init();
EXPECT_EQ(count, WaitUtils::waitCount);
volatile uint32_t pollValue = 3u;
uint32_t expectedValue = 1;
uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);
EXPECT_TRUE(ret);
EXPECT_EQ(oldCount + WaitUtils::waitCount, CpuIntrinsicsTests::pauseCounter);
}