mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Revert "performance: Reuse GPU timestamp instead of KMD escape"
This reverts commit 9ca2091725.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e0a580fce7
commit
da9df9f0e7
@@ -25,9 +25,7 @@ TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) {
|
||||
cl_ulong hostTimestamp[2] = {0, 0};
|
||||
|
||||
auto mDev = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
|
||||
auto osTime = new MockOSTime();
|
||||
osTime->setDeviceTimerResolution(mDev->getHardwareInfo());
|
||||
mDev->setOSTime(osTime);
|
||||
mDev->setOSTime(new MockOSTime());
|
||||
|
||||
mDev->getDeviceAndHostTimer(
|
||||
&deviceTimestamp[0],
|
||||
|
||||
@@ -513,7 +513,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableBcsSwControlWa, -1, "Enable BCS WA via BCS
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableHostAllocationMemPolicy, false, "Enables Memory Policy for host allocation")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideHostAllocationMemPolicyMode, -1, "Override Memory Policy mode for host allocation -1: default (use the system configuration), 0: MPOL_DEFAULT, 1: MPOL_PREFERRED, 2: MPOL_BIND, 3: MPOL_INTERLEAVED, 4: MPOL_LOCAL, 5: MPOL_PREFERRED_MANY")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableFtrTile64Optimization, 0, "Control feature Tile64 Optimization flag passed to gmmlib. -1: pass as-is, 0: disable flag(default due to NEO-10623), 1: enable flag");
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, GpuTimestampRefreshTimeout, -1, "Set timeout to refresh cached GPU timestamp, -1: default 5 ms, >=0: timeout in ms")
|
||||
|
||||
/* IMPLICIT SCALING */
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.")
|
||||
|
||||
@@ -124,7 +124,6 @@ void RootDeviceEnvironment::initGmm() {
|
||||
void RootDeviceEnvironment::initOsTime() {
|
||||
if (!osTime) {
|
||||
osTime = OSTime::create(osInterface.get());
|
||||
osTime->setDeviceTimerResolution(*hwInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <mutex>
|
||||
@@ -19,12 +17,6 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) {
|
||||
return hwInfo.capabilityTable.defaultProfilingTimerResolution;
|
||||
};
|
||||
|
||||
DeviceTime::DeviceTime() {
|
||||
if (debugManager.flags.GpuTimestampRefreshTimeout.get() != -1) {
|
||||
timestampRefreshTimeoutMS = debugManager.flags.GpuTimestampRefreshTimeout.get();
|
||||
}
|
||||
}
|
||||
|
||||
bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
||||
pGpuCpuTime->cpuTimeinNS = 0;
|
||||
pGpuCpuTime->gpuTimeStamp = 0;
|
||||
@@ -39,50 +31,8 @@ uint64_t DeviceTime::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) cons
|
||||
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
|
||||
}
|
||||
|
||||
void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) {
|
||||
deviceTimerResolution = getDynamicDeviceTimerResolution(hwInfo);
|
||||
if (debugManager.flags.OverrideProfilingTimerResolution.get() != -1) {
|
||||
deviceTimerResolution = static_cast<double>(debugManager.flags.OverrideProfilingTimerResolution.get());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief If this method is called within 100ms interval, GPU timestamp
|
||||
* will be calculated based on CPU timestamp and previous GPU ticks
|
||||
* to reduce amount of internal KMD calls.
|
||||
*
|
||||
* @return returns false if internal call to KMD failed. True otherwise.
|
||||
*/
|
||||
bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime) {
|
||||
bool refreshTimestamps = false;
|
||||
|
||||
uint64_t cpuTimeinNS;
|
||||
osTime->getCpuTime(&cpuTimeinNS);
|
||||
auto cpuTimeDiffInNS = cpuTimeinNS - fetchedTimestamps.cpuTimeinNS;
|
||||
if (cpuTimeDiffInNS >= (NSEC_PER_MSEC * timestampRefreshTimeoutMS)) {
|
||||
refreshTimestamps = true;
|
||||
}
|
||||
|
||||
// Refresh on first call
|
||||
if (!initialGpuTimeStamp) {
|
||||
refreshTimestamps = true;
|
||||
}
|
||||
|
||||
if (refreshTimestamps) {
|
||||
if (!getGpuCpuTimeImpl(timeStamp, osTime)) {
|
||||
return false;
|
||||
}
|
||||
fetchedTimestamps = *timeStamp;
|
||||
} else {
|
||||
timeStamp->cpuTimeinNS = cpuTimeinNS;
|
||||
UNRECOVERABLE_IF(deviceTimerResolution == 0);
|
||||
timeStamp->gpuTimeStamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
||||
if (!getGpuCpuTimestamps(pGpuCpuTime, osTime)) {
|
||||
if (!getGpuCpuTimeImpl(pGpuCpuTime, osTime)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include <optional>
|
||||
|
||||
#define NSEC_PER_SEC (1000000000ULL)
|
||||
#define NSEC_PER_MSEC (NSEC_PER_SEC / 1000)
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class OSInterface;
|
||||
@@ -25,21 +25,15 @@ class OSTime;
|
||||
|
||||
class DeviceTime {
|
||||
public:
|
||||
DeviceTime();
|
||||
virtual ~DeviceTime() = default;
|
||||
bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime);
|
||||
virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime);
|
||||
virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const;
|
||||
virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const;
|
||||
bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime);
|
||||
void setDeviceTimerResolution(HardwareInfo const &hwInfo);
|
||||
|
||||
std::optional<uint64_t> initialGpuTimeStamp{};
|
||||
bool waitingForGpuTimeStampOverflow = false;
|
||||
uint64_t gpuTimeStampOverflowCounter = 0;
|
||||
double deviceTimerResolution = 0;
|
||||
uint32_t timestampRefreshTimeoutMS = 100u;
|
||||
TimeStampData fetchedTimestamps{};
|
||||
};
|
||||
|
||||
class OSTime {
|
||||
@@ -67,10 +61,6 @@ class OSTime {
|
||||
|
||||
uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; }
|
||||
|
||||
void setDeviceTimerResolution(HardwareInfo const &hwInfo) const {
|
||||
deviceTime->setDeviceTimerResolution(hwInfo);
|
||||
}
|
||||
|
||||
protected:
|
||||
OSTime() = default;
|
||||
OSInterface *osInterface = nullptr;
|
||||
|
||||
@@ -18,28 +18,15 @@ class MockDeviceTimeDrm : public DeviceTimeDrm {
|
||||
using DeviceTimeDrm::pDrm;
|
||||
|
||||
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
||||
getGpuCpuTimeImplCalled++;
|
||||
if (callBaseGetGpuCpuTimeImpl) {
|
||||
return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime);
|
||||
}
|
||||
*pGpuCpuTime = gpuCpuTimeValue;
|
||||
return getGpuCpuTimeImplResult;
|
||||
}
|
||||
|
||||
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||
if (callGetDynamicDeviceTimerResolution) {
|
||||
return DeviceTimeDrm::getDynamicDeviceTimerResolution(hwInfo);
|
||||
}
|
||||
return dynamicDeviceTimerResolutionValue;
|
||||
}
|
||||
|
||||
bool callBaseGetGpuCpuTimeImpl = true;
|
||||
bool getGpuCpuTimeImplResult = true;
|
||||
TimeStampData gpuCpuTimeValue{};
|
||||
uint32_t getGpuCpuTimeImplCalled = 0;
|
||||
|
||||
bool callGetDynamicDeviceTimerResolution = false;
|
||||
double dynamicDeviceTimerResolutionValue = 1.0;
|
||||
};
|
||||
|
||||
class MockOSTimeLinux : public OSTimeLinux {
|
||||
|
||||
@@ -50,11 +50,11 @@ const char *MockDevice::getProductAbbrev() const {
|
||||
MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex)
|
||||
: RootDevice(executionEnvironment, rootDeviceIndex) {
|
||||
UltDeviceFactory::initializeMemoryManager(*executionEnvironment);
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
|
||||
if (!getOSTime()) {
|
||||
getRootDeviceEnvironmentRef().osTime = MockOSTime::create();
|
||||
getRootDeviceEnvironmentRef().osTime->setDeviceTimerResolution(hwInfo);
|
||||
}
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo);
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm();
|
||||
if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) {
|
||||
|
||||
@@ -599,5 +599,4 @@ ForceTlbFlushWithTaskCountAfterCopy = -1
|
||||
ForceSynchronizedDispatchMode = -1
|
||||
DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
|
||||
ReadOnlyAllocationsTypeMask = 0
|
||||
GpuTimestampRefreshTimeout = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
||||
#include "shared/source/os_interface/linux/os_time_linux.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/linux/mock_os_time_linux.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
|
||||
@@ -49,8 +48,6 @@ struct DrmTimeTest : public ::testing::Test {
|
||||
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||
auto hwInfo = rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
osTime->setDeviceTimerResolution(*hwInfo);
|
||||
deviceTime = osTime->getDeviceTime();
|
||||
}
|
||||
|
||||
@@ -205,7 +202,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultRes
|
||||
|
||||
drm->getParamRetValue = 0;
|
||||
drm->ioctlRes = -1;
|
||||
deviceTime->callGetDynamicDeviceTimerResolution = true;
|
||||
|
||||
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
||||
EXPECT_DOUBLE_EQ(result, defaultResolution);
|
||||
}
|
||||
@@ -242,7 +239,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectR
|
||||
// 19200000 is frequency yelding 52.083ns resolution
|
||||
drm->getParamRetValue = 19200000;
|
||||
drm->ioctlRes = 0;
|
||||
deviceTime->callGetDynamicDeviceTimerResolution = true;
|
||||
|
||||
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
||||
EXPECT_DOUBLE_EQ(result, 52.08333333333333);
|
||||
}
|
||||
@@ -285,46 +282,3 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsReturne
|
||||
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||
TimeStampData gpuCpuTime;
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||
|
||||
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
|
||||
auto cpuTimeBefore = actualTime;
|
||||
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||
|
||||
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
|
||||
auto cpuTimeAfter = actualTime;
|
||||
|
||||
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
|
||||
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
|
||||
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
|
||||
}
|
||||
|
||||
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueAfterIntervalThenCallToKmd) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.GpuTimestampRefreshTimeout.set(0);
|
||||
|
||||
// Recreate mock to apply debug flag
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||
auto deviceTime = osTime->getDeviceTime();
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||
|
||||
TimeStampData gpuCpuTime;
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||
}
|
||||
|
||||
@@ -29,19 +29,10 @@ BOOL WINAPI queryPerformanceCounterMock(
|
||||
class MockDeviceTimeWin : public MockDeviceTime {
|
||||
public:
|
||||
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
||||
getGpuCpuTimeImplCalled++;
|
||||
*pGpuCpuTime = gpuCpuTimeValue;
|
||||
return getGpuCpuTimeImplResult;
|
||||
return true;
|
||||
}
|
||||
|
||||
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||
return deviceTimerResolution;
|
||||
}
|
||||
|
||||
bool getGpuCpuTimeImplResult = true;
|
||||
TimeStampData gpuCpuTimeValue{};
|
||||
uint32_t getGpuCpuTimeImplCalled = 0;
|
||||
double deviceTimerResolution = 1;
|
||||
};
|
||||
|
||||
struct OSTimeWinTest : public ::testing::Test {
|
||||
@@ -205,46 +196,3 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur
|
||||
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
|
||||
osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
|
||||
LARGE_INTEGER frequency = {};
|
||||
frequency.QuadPart = NSEC_PER_SEC;
|
||||
osTime->setFrequency(frequency);
|
||||
|
||||
auto deviceTime = new MockDeviceTimeWin();
|
||||
osTime->deviceTime.reset(deviceTime);
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||
osTime->setDeviceTimerResolution(*hwInfo);
|
||||
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||
TimeStampData gpuCpuTime;
|
||||
deviceTime->gpuCpuTimeValue = {1u, 1u};
|
||||
valueToSet.QuadPart = 1;
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||
|
||||
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
|
||||
auto cpuTimeBefore = gpuCpuTime.cpuTimeinNS;
|
||||
valueToSet.QuadPart = 5;
|
||||
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||
|
||||
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
|
||||
auto cpuTimeAfter = gpuCpuTime.cpuTimeinNS;
|
||||
|
||||
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
|
||||
|
||||
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
|
||||
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
|
||||
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
|
||||
}
|
||||
|
||||
TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) {
|
||||
TimeStampData gpuCpuTime;
|
||||
auto deviceTime = new MockDeviceTimeWin();
|
||||
osTime->deviceTime.reset(deviceTime);
|
||||
deviceTime->getGpuCpuTimeImplResult = false;
|
||||
EXPECT_FALSE(osTime->getGpuCpuTime(&gpuCpuTime));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user