From 881895bd2afdd7a236ec0fb698f442d5d51d833a Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Tue, 10 Apr 2018 16:19:13 +0200 Subject: [PATCH] Stop querying gpu frequency during each profiling data calculation change type of profiling timer resolution in device info to double Change-Id: I41a67ecf61cd3bdc5a997b1f083b9998063f4f7f --- .../compiler_interface/compiler_interface.cpp | 2 +- runtime/device/device_caps.cpp | 3 +- runtime/device/device_info.h | 3 +- runtime/device/device_info_map.h | 4 +- runtime/event/event.cpp | 2 +- unit_tests/gen8/test_device_caps.cpp | 2 +- unit_tests/gen9/bxt/test_device_caps_bxt.cpp | 2 +- unit_tests/gen9/glk/test_device_caps_glk.cpp | 2 +- unit_tests/gen9/skl/test_device_caps_skl.cpp | 2 +- unit_tests/profiling/profiling_tests.cpp | 41 ++++++++++++++++--- 10 files changed, 47 insertions(+), 16 deletions(-) diff --git a/runtime/compiler_interface/compiler_interface.cpp b/runtime/compiler_interface/compiler_interface.cpp index 349c7c76ab..a7babd9d55 100644 --- a/runtime/compiler_interface/compiler_interface.cpp +++ b/runtime/compiler_interface/compiler_interface.cpp @@ -418,7 +418,7 @@ CIF::RAII::UPtr_t CompilerInterface::createIgcT return nullptr; } - newDeviceCtx->SetProfilingTimerResolution(static_cast(device.getDeviceInfo().profilingTimerResolution)); + newDeviceCtx->SetProfilingTimerResolution(static_cast(device.getDeviceInfo().outProfilingTimerResolution)); auto igcPlatform = newDeviceCtx->GetPlatformHandle(); auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle(); auto igcFeWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle(); diff --git a/runtime/device/device_caps.cpp b/runtime/device/device_caps.cpp index c85a6fa055..4c793ea6e6 100644 --- a/runtime/device/device_caps.cpp +++ b/runtime/device/device_caps.cpp @@ -208,7 +208,8 @@ void Device::initializeCaps() { deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize); deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE; - deviceInfo.profilingTimerResolution = static_cast(getProfilingTimerResolution()); + deviceInfo.profilingTimerResolution = getProfilingTimerResolution(); + deviceInfo.outProfilingTimerResolution = static_cast(deviceInfo.profilingTimerResolution); deviceInfo.memBaseAddressAlign = 1024; deviceInfo.minDataTypeAlignSize = 128; diff --git a/runtime/device/device_info.h b/runtime/device/device_info.h index 32964dc5de..2479c884ae 100644 --- a/runtime/device/device_info.h +++ b/runtime/device/device_info.h @@ -92,7 +92,8 @@ struct DeviceInfo { cl_device_local_mem_type localMemType; cl_ulong localMemSize; cl_bool errorCorrectionSupport; - size_t profilingTimerResolution; + double profilingTimerResolution; + size_t outProfilingTimerResolution; cl_bool endianLittle; cl_bool deviceAvailable; cl_bool compilerAvailable; diff --git a/runtime/device/device_info_map.h b/runtime/device/device_info_map.h index 18ecb51857..3c6f9ff7a4 100644 --- a/runtime/device/device_info_map.h +++ b/runtime/device/device_info_map.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -133,7 +133,7 @@ template<> struct Map : pu template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; -template<> struct Map : public MapBase {}; +template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; diff --git a/runtime/event/event.cpp b/runtime/event/event.cpp index 3646a6b1eb..3b7c77d782 100644 --- a/runtime/event/event.cpp +++ b/runtime/event/event.cpp @@ -259,7 +259,7 @@ bool Event::calcProfilingData() { int64_t c0 = 0; if (!dataCalculated && timeStampNode && !profilingCpuPath) { - double frequency = cmdQueue->getDevice().getProfilingTimerResolution(); + double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution; /* calculation based on equation CpuTime = GpuTime * scalar + const( == c0) scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu) diff --git a/unit_tests/gen8/test_device_caps.cpp b/unit_tests/gen8/test_device_caps.cpp index 03c7d35cf6..5731f8ae08 100644 --- a/unit_tests/gen8/test_device_caps.cpp +++ b/unit_tests/gen8/test_device_caps.cpp @@ -77,7 +77,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) { BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); - EXPECT_EQ(83u, caps.profilingTimerResolution); + EXPECT_EQ(83u, caps.outProfilingTimerResolution); } typedef Test BdwUsDeviceIdTest; diff --git a/unit_tests/gen9/bxt/test_device_caps_bxt.cpp b/unit_tests/gen9/bxt/test_device_caps_bxt.cpp index c57c9319f6..36896bd0c7 100644 --- a/unit_tests/gen9/bxt/test_device_caps_bxt.cpp +++ b/unit_tests/gen9/bxt/test_device_caps_bxt.cpp @@ -35,7 +35,7 @@ BXTTEST_F(BxtDeviceCaps, reportsOcl12) { BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); - EXPECT_EQ(52u, caps.profilingTimerResolution); + EXPECT_EQ(52u, caps.outProfilingTimerResolution); } BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) { diff --git a/unit_tests/gen9/glk/test_device_caps_glk.cpp b/unit_tests/gen9/glk/test_device_caps_glk.cpp index 376a18c6eb..dc4fcb2218 100644 --- a/unit_tests/gen9/glk/test_device_caps_glk.cpp +++ b/unit_tests/gen9/glk/test_device_caps_glk.cpp @@ -29,7 +29,7 @@ typedef Test Gen9DeviceCaps; GLKTEST_F(Gen9DeviceCaps, GlkProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); - EXPECT_EQ(52u, caps.profilingTimerResolution); + EXPECT_EQ(52u, caps.outProfilingTimerResolution); } GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) { diff --git a/unit_tests/gen9/skl/test_device_caps_skl.cpp b/unit_tests/gen9/skl/test_device_caps_skl.cpp index 009dcc9efa..1e83442b8c 100644 --- a/unit_tests/gen9/skl/test_device_caps_skl.cpp +++ b/unit_tests/gen9/skl/test_device_caps_skl.cpp @@ -35,7 +35,7 @@ SKLTEST_F(SklDeviceCaps, reportsOcl21) { SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); - EXPECT_EQ(83u, caps.profilingTimerResolution); + EXPECT_EQ(83u, caps.outProfilingTimerResolution); } SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) { diff --git a/unit_tests/profiling/profiling_tests.cpp b/unit_tests/profiling/profiling_tests.cpp index 153257b3b3..3506eb2f08 100644 --- a/unit_tests/profiling/profiling_tests.cpp +++ b/unit_tests/profiling/profiling_tests.cpp @@ -402,13 +402,44 @@ struct MockTagNode : public TagNode { } }; -TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhenCompleteIsZero) { - MockDevice *device = DeviceHelper<>::create(); +class MyOSTime : public OSTime { + public: + static int instanceNum; + MyOSTime() { + instanceNum++; + } + double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { + EXPECT_FALSE(true); + return 1.0; + } + bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override { + EXPECT_FALSE(true); + return false; + } + bool getCpuTime(uint64_t *timeStamp) override { + EXPECT_FALSE(true); + return false; + }; + double getHostTimerResolution() const override { + EXPECT_FALSE(true); + return 0; + } + uint64_t getCpuRawTimestamp() override { + EXPECT_FALSE(true); + return 0; + } +}; +int MyOSTime::instanceNum = 0; +TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) { + std::unique_ptr device(DeviceHelper<>::create()); + MyOSTime::instanceNum = 0; + device->setOSTime(new MyOSTime()); + EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; - MockCommandQueue cmdQ(&context, device, props); + MockCommandQueue cmdQ(&context, device.get(), props); cmdQ.setProfilingEnabled(); - cmdQ.device = device; + cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; @@ -429,7 +460,6 @@ TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhe EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS); cmdQ.device = nullptr; - delete device; } struct ProfilingWithPerfCountersTests : public ProfilingTests, @@ -774,5 +804,4 @@ HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCount pCmdQ->setPerfCountersEnabled(false, UINT32_MAX); } - } // namespace OCLRT