Revert "performance: Reuse GPU timestamp instead of KMD escape"

This reverts commit 9ca2091725.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2024-04-18 09:46:41 +02:00
committed by Compute-Runtime-Automation
parent e0a580fce7
commit da9df9f0e7
10 changed files with 8 additions and 184 deletions

View File

@@ -25,9 +25,7 @@ TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) {
cl_ulong hostTimestamp[2] = {0, 0};
auto mDev = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
auto osTime = new MockOSTime();
osTime->setDeviceTimerResolution(mDev->getHardwareInfo());
mDev->setOSTime(osTime);
mDev->setOSTime(new MockOSTime());
mDev->getDeviceAndHostTimer(
&deviceTimestamp[0],

View File

@@ -513,7 +513,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableBcsSwControlWa, -1, "Enable BCS WA via BCS
DECLARE_DEBUG_VARIABLE(bool, EnableHostAllocationMemPolicy, false, "Enables Memory Policy for host allocation")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideHostAllocationMemPolicyMode, -1, "Override Memory Policy mode for host allocation -1: default (use the system configuration), 0: MPOL_DEFAULT, 1: MPOL_PREFERRED, 2: MPOL_BIND, 3: MPOL_INTERLEAVED, 4: MPOL_LOCAL, 5: MPOL_PREFERRED_MANY")
DECLARE_DEBUG_VARIABLE(int32_t, EnableFtrTile64Optimization, 0, "Control feature Tile64 Optimization flag passed to gmmlib. -1: pass as-is, 0: disable flag(default due to NEO-10623), 1: enable flag");
DECLARE_DEBUG_VARIABLE(int32_t, GpuTimestampRefreshTimeout, -1, "Set timeout to refresh cached GPU timestamp, -1: default 5 ms, >=0: timeout in ms")
/* IMPLICIT SCALING */
DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.")

View File

@@ -124,7 +124,6 @@ void RootDeviceEnvironment::initGmm() {
void RootDeviceEnvironment::initOsTime() {
if (!osTime) {
osTime = OSTime::create(osInterface.get());
osTime->setDeviceTimerResolution(*hwInfo);
}
}

View File

@@ -7,8 +7,6 @@
#include "shared/source/os_interface/os_time.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hw_info.h"
#include <mutex>
@@ -19,12 +17,6 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) {
return hwInfo.capabilityTable.defaultProfilingTimerResolution;
};
DeviceTime::DeviceTime() {
if (debugManager.flags.GpuTimestampRefreshTimeout.get() != -1) {
timestampRefreshTimeoutMS = debugManager.flags.GpuTimestampRefreshTimeout.get();
}
}
bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) {
pGpuCpuTime->cpuTimeinNS = 0;
pGpuCpuTime->gpuTimeStamp = 0;
@@ -39,50 +31,8 @@ uint64_t DeviceTime::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) cons
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
}
void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) {
deviceTimerResolution = getDynamicDeviceTimerResolution(hwInfo);
if (debugManager.flags.OverrideProfilingTimerResolution.get() != -1) {
deviceTimerResolution = static_cast<double>(debugManager.flags.OverrideProfilingTimerResolution.get());
}
}
/**
* @brief If this method is called within 100ms interval, GPU timestamp
* will be calculated based on CPU timestamp and previous GPU ticks
* to reduce amount of internal KMD calls.
*
* @return returns false if internal call to KMD failed. True otherwise.
*/
bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime) {
bool refreshTimestamps = false;
uint64_t cpuTimeinNS;
osTime->getCpuTime(&cpuTimeinNS);
auto cpuTimeDiffInNS = cpuTimeinNS - fetchedTimestamps.cpuTimeinNS;
if (cpuTimeDiffInNS >= (NSEC_PER_MSEC * timestampRefreshTimeoutMS)) {
refreshTimestamps = true;
}
// Refresh on first call
if (!initialGpuTimeStamp) {
refreshTimestamps = true;
}
if (refreshTimestamps) {
if (!getGpuCpuTimeImpl(timeStamp, osTime)) {
return false;
}
fetchedTimestamps = *timeStamp;
} else {
timeStamp->cpuTimeinNS = cpuTimeinNS;
UNRECOVERABLE_IF(deviceTimerResolution == 0);
timeStamp->gpuTimeStamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
}
return true;
}
bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) {
if (!getGpuCpuTimestamps(pGpuCpuTime, osTime)) {
if (!getGpuCpuTimeImpl(pGpuCpuTime, osTime)) {
return false;
}

View File

@@ -10,7 +10,7 @@
#include <optional>
#define NSEC_PER_SEC (1000000000ULL)
#define NSEC_PER_MSEC (NSEC_PER_SEC / 1000)
namespace NEO {
class OSInterface;
@@ -25,21 +25,15 @@ class OSTime;
class DeviceTime {
public:
DeviceTime();
virtual ~DeviceTime() = default;
bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime);
virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime);
virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const;
virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const;
bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime);
void setDeviceTimerResolution(HardwareInfo const &hwInfo);
std::optional<uint64_t> initialGpuTimeStamp{};
bool waitingForGpuTimeStampOverflow = false;
uint64_t gpuTimeStampOverflowCounter = 0;
double deviceTimerResolution = 0;
uint32_t timestampRefreshTimeoutMS = 100u;
TimeStampData fetchedTimestamps{};
};
class OSTime {
@@ -67,10 +61,6 @@ class OSTime {
uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; }
void setDeviceTimerResolution(HardwareInfo const &hwInfo) const {
deviceTime->setDeviceTimerResolution(hwInfo);
}
protected:
OSTime() = default;
OSInterface *osInterface = nullptr;

View File

@@ -18,28 +18,15 @@ class MockDeviceTimeDrm : public DeviceTimeDrm {
using DeviceTimeDrm::pDrm;
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
getGpuCpuTimeImplCalled++;
if (callBaseGetGpuCpuTimeImpl) {
return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime);
}
*pGpuCpuTime = gpuCpuTimeValue;
return getGpuCpuTimeImplResult;
}
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
if (callGetDynamicDeviceTimerResolution) {
return DeviceTimeDrm::getDynamicDeviceTimerResolution(hwInfo);
}
return dynamicDeviceTimerResolutionValue;
}
bool callBaseGetGpuCpuTimeImpl = true;
bool getGpuCpuTimeImplResult = true;
TimeStampData gpuCpuTimeValue{};
uint32_t getGpuCpuTimeImplCalled = 0;
bool callGetDynamicDeviceTimerResolution = false;
double dynamicDeviceTimerResolutionValue = 1.0;
};
class MockOSTimeLinux : public OSTimeLinux {

View File

@@ -50,11 +50,11 @@ const char *MockDevice::getProductAbbrev() const {
MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex)
: RootDevice(executionEnvironment, rootDeviceIndex) {
UltDeviceFactory::initializeMemoryManager(*executionEnvironment);
auto &hwInfo = getHardwareInfo();
if (!getOSTime()) {
getRootDeviceEnvironmentRef().osTime = MockOSTime::create();
getRootDeviceEnvironmentRef().osTime->setDeviceTimerResolution(hwInfo);
}
auto &hwInfo = getHardwareInfo();
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo);
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm();
if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) {

View File

@@ -599,5 +599,4 @@ ForceTlbFlushWithTaskCountAfterCopy = -1
ForceSynchronizedDispatchMode = -1
DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
ReadOnlyAllocationsTypeMask = 0
GpuTimestampRefreshTimeout = -1
# Please don't edit below this line

View File

@@ -9,7 +9,6 @@
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/os_time_linux.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/linux/mock_os_time_linux.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
@@ -49,8 +48,6 @@ struct DrmTimeTest : public ::testing::Test {
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
osTime->setResolutionFunc(resolutionFuncTrue);
osTime->setGetTimeFunc(getTimeFuncTrue);
auto hwInfo = rootDeviceEnvironment.getMutableHardwareInfo();
osTime->setDeviceTimerResolution(*hwInfo);
deviceTime = osTime->getDeviceTime();
}
@@ -205,7 +202,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultRes
drm->getParamRetValue = 0;
drm->ioctlRes = -1;
deviceTime->callGetDynamicDeviceTimerResolution = true;
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
EXPECT_DOUBLE_EQ(result, defaultResolution);
}
@@ -242,7 +239,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectR
// 19200000 is frequency yelding 52.083ns resolution
drm->getParamRetValue = 19200000;
drm->ioctlRes = 0;
deviceTime->callGetDynamicDeviceTimerResolution = true;
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
EXPECT_DOUBLE_EQ(result, 52.08333333333333);
}
@@ -285,46 +282,3 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsReturne
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
}
}
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
TimeStampData gpuCpuTime;
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
auto cpuTimeBefore = actualTime;
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
auto cpuTimeAfter = actualTime;
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
}
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueAfterIntervalThenCallToKmd) {
DebugManagerStateRestore restore;
debugManager.flags.GpuTimestampRefreshTimeout.set(0);
// Recreate mock to apply debug flag
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
osTime->setResolutionFunc(resolutionFuncTrue);
osTime->setGetTimeFunc(getTimeFuncTrue);
auto deviceTime = osTime->getDeviceTime();
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
TimeStampData gpuCpuTime;
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
}

View File

@@ -29,19 +29,10 @@ BOOL WINAPI queryPerformanceCounterMock(
class MockDeviceTimeWin : public MockDeviceTime {
public:
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
getGpuCpuTimeImplCalled++;
*pGpuCpuTime = gpuCpuTimeValue;
return getGpuCpuTimeImplResult;
return true;
}
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
return deviceTimerResolution;
}
bool getGpuCpuTimeImplResult = true;
TimeStampData gpuCpuTimeValue{};
uint32_t getGpuCpuTimeImplCalled = 0;
double deviceTimerResolution = 1;
};
struct OSTimeWinTest : public ::testing::Test {
@@ -205,46 +196,3 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
}
}
TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
LARGE_INTEGER frequency = {};
frequency.QuadPart = NSEC_PER_SEC;
osTime->setFrequency(frequency);
auto deviceTime = new MockDeviceTimeWin();
osTime->deviceTime.reset(deviceTime);
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
osTime->setDeviceTimerResolution(*hwInfo);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
TimeStampData gpuCpuTime;
deviceTime->gpuCpuTimeValue = {1u, 1u};
valueToSet.QuadPart = 1;
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
auto cpuTimeBefore = gpuCpuTime.cpuTimeinNS;
valueToSet.QuadPart = 5;
osTime->getGpuCpuTime(&gpuCpuTime);
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
auto cpuTimeAfter = gpuCpuTime.cpuTimeinNS;
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
}
TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) {
TimeStampData gpuCpuTime;
auto deviceTime = new MockDeviceTimeWin();
osTime->deviceTime.reset(deviceTime);
deviceTime->getGpuCpuTimeImplResult = false;
EXPECT_FALSE(osTime->getGpuCpuTime(&gpuCpuTime));
}