feature: print global timestamp

Related-To: LOCI-4285

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2023-05-15 16:10:27 +00:00
committed by Compute-Runtime-Automation
parent d0d5a03be7
commit 29682a4f8d
4 changed files with 81 additions and 1 deletions

View File

@@ -885,6 +885,17 @@ ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *de
*deviceTimestamp = queueTimeStamp.gpuTimeStamp;
*hostTimestamp = queueTimeStamp.cpuTimeinNS;
if (NEO::DebugManager.flags.PrintGlobalTimestampInNs.get()) {
const auto &capabilityTable = this->neoDevice->getHardwareInfo().capabilityTable;
const auto validBits = std::min(capabilityTable.timestampValidBits, capabilityTable.kernelTimestampValidBits);
uint64_t kernelTimestampMaxValueInCycles = std::numeric_limits<uint64_t>::max();
if (validBits < 64u) {
kernelTimestampMaxValueInCycles = (1ull << validBits) - 1;
}
const uint64_t deviceTsinNs = (*deviceTimestamp & kernelTimestampMaxValueInCycles) * this->neoDevice->getDeviceInfo().outProfilingTimerResolution;
NEO::printDebugString(true, stdout,
"Host timestamp in ns : %llu | Device timestamp in ns : %llu\n", *hostTimestamp, deviceTsinNs);
}
return ZE_RESULT_SUCCESS;
}

View File

@@ -1648,7 +1648,8 @@ TEST_F(GlobalTimestampTest, whenQueryingForTimerResolutionWithUseCyclesPerSecond
class FalseCpuDeviceTime : public NEO::DeviceTime {
public:
bool getCpuGpuTime(TimeStampData *pGpuCpuTime, NEO::OSTime *) override {
pGpuCpuTime->cpuTimeinNS = 0u;
pGpuCpuTime->cpuTimeinNS = mockCpuTimeInNs;
pGpuCpuTime->gpuTimeStamp = mockGpuTimeInNs;
return true;
}
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
@@ -1657,6 +1658,8 @@ class FalseCpuDeviceTime : public NEO::DeviceTime {
uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
}
uint64_t mockCpuTimeInNs = 0u;
uint64_t mockGpuTimeInNs = 100u;
};
class FalseCpuTime : public NEO::OSTime {
@@ -1676,6 +1679,10 @@ class FalseCpuTime : public NEO::OSTime {
static std::unique_ptr<OSTime> create() {
return std::unique_ptr<OSTime>(new FalseCpuTime());
}
FalseCpuDeviceTime *getFalseCpuDeviceTime() {
return static_cast<FalseCpuDeviceTime *>(this->deviceTime.get());
}
};
TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuTimeIsFalseReturnArbitraryValues) {
@@ -1695,6 +1702,66 @@ TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuTimeIsFalseRetu
EXPECT_NE(0u, deviceTs);
}
TEST_F(DeviceTest, givenPrintGlobalTimestampIsSetWhenGetGlobalTimestampIsCalledThenOutputStringIsAsExpected) {
DebugManagerStateRestore restorer;
DebugManager.flags.PrintGlobalTimestampInNs.set(true);
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
auto falseCpuTime = std::make_unique<FalseCpuTime>();
auto cpuDeviceTime = falseCpuTime->getFalseCpuDeviceTime();
// Using 36 bits for gpu timestamp
cpuDeviceTime->mockGpuTimeInNs = 0xFFFFFFFFF;
rootDeviceEnvironment.osTime = std::move(falseCpuTime);
auto &capabilityTable = rootDeviceEnvironment.getMutableHardwareInfo()->capabilityTable;
capabilityTable.timestampValidBits = 36;
capabilityTable.kernelTimestampValidBits = 32;
testing::internal::CaptureStdout();
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::string output = testing::internal::GetCapturedStdout();
// Considering kernelTimestampValidBits(32)
auto gpuTimeStamp = cpuDeviceTime->mockGpuTimeInNs & 0xFFFFFFFF;
const std::string expectedString("Host timestamp in ns : 0 | Device timestamp in ns : " +
std::to_string(static_cast<uint64_t>(neoDevice->getProfilingTimerResolution()) *
gpuTimeStamp) +
"\n");
EXPECT_STREQ(output.c_str(), expectedString.c_str());
}
TEST_F(DeviceTest, givenPrintGlobalTimestampIsSetAnd64bitTimestampWhenGetGlobalTimestampIsCalledThenOutputStringIsAsExpected) {
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
auto falseCpuTime = std::make_unique<FalseCpuTime>();
auto cpuDeviceTime = falseCpuTime->getFalseCpuDeviceTime();
// Using 36 bits for gpu timestamp
cpuDeviceTime->mockGpuTimeInNs = 0xFFFFFFFFF;
rootDeviceEnvironment.osTime = std::move(falseCpuTime);
auto &capabilityTable = rootDeviceEnvironment.getMutableHardwareInfo()->capabilityTable;
capabilityTable.timestampValidBits = 64;
capabilityTable.kernelTimestampValidBits = 64;
DebugManagerStateRestore restorer;
DebugManager.flags.PrintGlobalTimestampInNs.set(true);
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
testing::internal::CaptureStdout();
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::string output = testing::internal::GetCapturedStdout();
const std::string expectedString("Host timestamp in ns : 0 | Device timestamp in ns : " +
std::to_string(static_cast<uint64_t>(neoDevice->getProfilingTimerResolution()) *
cpuDeviceTime->mockGpuTimeInNs) +
"\n");
printf("output: <%s> | Expected: <%s>\n", output.c_str(), expectedString.c_str());
EXPECT_STREQ(output.c_str(), expectedString.c_str());
}
using DeviceGetMemoryTests = DeviceTest;
TEST_F(DeviceGetMemoryTests, whenCallingGetMemoryPropertiesWithCountZeroThenOneIsReturned) {