Tuning start and end timestamp

Change-Id: I1504c596cbb42de266b62aeb1886bf6fb6501ad9
Signed-off-by: Koska Andrzej<andrzej.koska@intel.com>
Related-To: NEO-4615
This commit is contained in:
Koska
2020-05-12 12:50:20 +02:00
committed by sys_ocldev
parent 99839ccff5
commit 196a08f225
5 changed files with 62 additions and 1 deletions

View File

@ -310,6 +310,13 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
}
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
@ -328,7 +335,6 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
endTimeStamp = startTimeStamp + cpuDuration;
completeTimeStamp = startTimeStamp + cpuCompleteDuration;

View File

@ -697,6 +697,11 @@ TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenP
EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo()));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenCallGetGlobalTimeStampBitsReturnsCorrectValue) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_EQ(helper.getGlobalTimeStampBits(), 36U);
}
TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) {
DebugManagerStateRestore dbgRestore;
VariableBackup<bool> orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory);

View File

@ -522,6 +522,48 @@ TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawData
event.timeStampNode = nullptr;
}
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MyOSTime::instanceNum = 0;
device->setOSTime(new MyOSTime());
EXPECT_EQ(1, MyOSTime::instanceNum);
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
cmdQ.setProfilingEnabled();
cmdQ.device = device.get();
HwTimeStamps timestamp;
timestamp.GlobalStartTS = 0;
timestamp.ContextStartTS = 20;
timestamp.GlobalEndTS = 80;
timestamp.ContextEndTS = 56;
timestamp.GlobalCompleteTS = 0;
timestamp.ContextCompleteTS = 70;
MockTagNode<HwTimeStamps> timestampNode;
timestampNode.tagForCpuAccess = &timestamp;
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.CPUTimeinNS = 83;
event.queueTimeStamp.GPUTimeStamp = 1;
event.setCPUProfilingPath(false);
event.timeStampNode = &timestampNode;
event.calcProfilingData();
cl_ulong queued, start;
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
EXPECT_LT(queued, start);
event.timeStampNode = nullptr;
}
struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test {
void SetUp() override {
PerformanceCountersFixture::SetUp();

View File

@ -92,6 +92,7 @@ class HwHelper {
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
virtual uint32_t getGlobalTimeStampBits() const = 0;
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
@ -244,6 +245,8 @@ class HwHelperHw : public HwHelper {
bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override;
uint32_t getGlobalTimeStampBits() const override;
protected:
static const AuxTranslationMode defaultAuxTranslationMode;
HwHelperHw() = default;

View File

@ -20,6 +20,11 @@ uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
}
template <typename GfxFamily>
inline uint32_t HwHelperHw<GfxFamily>::getGlobalTimeStampBits() const {
return 36;
}
template <typename GfxFamily>
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
coherencyFlag = true;