From b6e76b91181da6dcaac615c0a9e9895fec662c5c Mon Sep 17 00:00:00 2001 From: Joshua Santosh Ranjan Date: Thu, 6 Jul 2023 08:35:07 +0000 Subject: [PATCH] fix: Move event reference time tracking into event class This would avoid recalculating reference timestamps when event is used with different command lists. Related-To: LOCI-4563 Signed-off-by: Joshua Santosh Ranjan --- level_zero/core/source/cmdlist/cmdlist.h | 1 - level_zero/core/source/cmdlist/cmdlist_hw.inl | 16 --- .../core/source/cmdlist/cmdlist_imp.cpp | 8 +- level_zero/core/source/cmdlist/cmdlist_imp.h | 1 - level_zero/core/source/event/event.cpp | 8 ++ level_zero/core/source/event/event.h | 6 +- level_zero/core/source/event/event_impl.inl | 13 +++ .../sources/cmdlist/test_cmdlist_7.cpp | 33 ------ .../unit_tests/sources/event/test_event.cpp | 100 +++++++++++++++++- .../debug_settings/debug_variables_base.inl | 3 +- shared/test/common/test_files/igdrcl.config | 2 +- 11 files changed, 126 insertions(+), 65 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index f4cc1652dd..9ab69b6f3b 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -394,7 +394,6 @@ struct CommandList : _ze_command_list_handle_t { uint32_t commandListPerThreadPrivateScratchSize = 0u; uint32_t partitionCount = 1; uint32_t defaultMocsIndex = 0; - uint64_t timestampRefreshIntervalInNanoSec = 0; bool isFlushTaskSubmissionEnabled = false; bool isSyncModeQueue = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index fa275cad8b..547305d233 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily::reset() { cmdListCurrentStartOffset = 0; mappedTsEventList.clear(); - previousSynchronizedTimestamp = {}; - return ZE_RESULT_SUCCESS; } @@ -236,20 +234,6 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO } createLogicalStateHelper(); - - const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; - const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); - if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) { - this->timestampRefreshIntervalInNanoSec = static_cast(maxKernelTsValue * frequency); - } else { - this->timestampRefreshIntervalInNanoSec = maxKernelTsValue; - } - if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) { - constexpr uint32_t milliSecondsToNanoSeconds = 1000000u; - const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get(); - this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds; - } - return returnType; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index fb94da9b95..9d865031bd 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -246,14 +246,8 @@ void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { if (mappedTsEventList.size()) { uint64_t currentCpuTimeStamp = 0; device->getNEODevice()->getOSTime()->getCpuTime(¤tCpuTimeStamp); - const auto recalculate = - (currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec; - if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) { - device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp); - } - for (auto &event : mappedTsEventList) { - event->setReferenceTs(previousSynchronizedTimestamp); + event->setReferenceTs(currentCpuTimeStamp); } if (isClearEnabled) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index fdabf91f8f..be3d1f0f94 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -56,7 +56,6 @@ struct CommandListImp : CommandList { static constexpr bool cmdListDefaultMediaSamplerClockGate = false; static constexpr bool cmdListDefaultGlobalAtomics = false; std::vector mappedTsEventList{}; - NEO::TimeStampData previousSynchronizedTimestamp{}; }; } // namespace L0 diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 0708ade1a4..a168b547a3 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -410,4 +410,12 @@ void Event::unsetCmdQueue(bool unregisterClient) { latestUsedCmdQueue = nullptr; } +void Event::setReferenceTs(uint64_t currentCpuTimeStamp) { + const auto recalculate = + (currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec; + if (referenceTs.cpuTimeinNS == 0 || recalculate) { + device->getNEODevice()->getOSTime()->getCpuGpuTime(&referenceTs); + } +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 956f31af52..974b499d5e 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -221,9 +221,10 @@ struct Event : _ze_event_handle_t { uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } void setLatestUsedCmdQueue(CommandQueue *newCmdQ); - void setReferenceTs(NEO::TimeStampData ×tamp) { - referenceTs = timestamp; + NEO::TimeStampData *peekReferenceTs() { + return &referenceTs; } + void setReferenceTs(uint64_t currentCpuTimeStamp); bool hasKerneMappedTsCapability = false; protected: @@ -282,6 +283,7 @@ struct Event : _ze_event_handle_t { bool signalAllEventPackets = false; bool isFromIpcPool = false; bool inOrderExecEvent = false; + uint64_t timestampRefreshIntervalInNanoSec = 0; }; struct EventPool : _ze_event_pool_handle_t { diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 1604c07818..fa8f252635 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -67,6 +67,19 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->resetDeviceCompletionData(true); } + const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); + if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) { + event->timestampRefreshIntervalInNanoSec = static_cast(maxKernelTsValue * frequency) / 2; + } else { + event->timestampRefreshIntervalInNanoSec = maxKernelTsValue / 2; + } + if (NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get() != -1) { + constexpr uint32_t milliSecondsToNanoSeconds = 1000000u; + const uint32_t refreshTime = NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get(); + event->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds; + } + return event; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index fdf8c6e30a..51dbd35c70 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -3137,42 +3137,9 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - neoDevice->setOSTime(new MockOSTime()); commandList->addToMappedEventList(event.get()); - commandList->storeReferenceTsToMappedEvents(true); - EXPECT_EQ(0u, commandList->peekMappedEventList().size()); -} - -HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) { - - DebugManagerStateRestore restorer; - createKernel(); - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; - eventPoolDesc.count = 2; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - eventDesc.wait = 0; - eventDesc.signal = 0; - - ze_result_t returnValue; - std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); - - NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0); - auto commandList = std::make_unique>>(); - neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp()); - auto osTime = static_cast(neoDevice->getOSTime()); - commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - commandList->addToMappedEventList(event.get()); - osTime->mockCpuTime = 1; commandList->storeReferenceTsToMappedEvents(false); EXPECT_EQ(1u, commandList->peekMappedEventList().size()); - commandList->addToMappedEventList(event.get()); - commandList->storeReferenceTsToMappedEvents(false); - osTime->mockCpuTime = 2; - commandList->addToMappedEventList(event.get()); commandList->storeReferenceTsToMappedEvents(true); EXPECT_EQ(0u, commandList->peekMappedEventList().size()); } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 9018b13f75..c969edf20b 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -2124,8 +2124,9 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe const int64_t cpuReferenceTimeInNs = 3000; const auto maxKernelTsValue = maxNBitValue(32); - NEO::TimeStampData referenceTs{static_cast(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs}; - event->setReferenceTs(referenceTs); + NEO::TimeStampData *referenceTs = event->peekReferenceTs(); + referenceTs->cpuTimeinNS = cpuReferenceTimeInNs; + referenceTs->gpuTimeStamp = static_cast(gpuReferenceTimeInNs / deviceTsFrequency); auto timeToTimeStamp = [&](uint32_t timeInNs) { return static_cast(timeInNs / deviceTsFrequency); @@ -2214,6 +2215,101 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset); } +using HostMappedEventTests = Test; +HWTEST_F(HostMappedEventTests, givenMappedEventsWhenSettingRefereshTimestampThenCorrectRefreshIntervalIsCalculated) { + + ze_event_pool_desc_t eventPoolDesc = {}; + const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + const auto kernelTsValidBits = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_NE(nullptr, eventPool); + + uint64_t expectedTsRefreshIntervalInNanoSec = 0u; + if (kernelTsValidBits >= 64) { + expectedTsRefreshIntervalInNanoSec = maxNBitValue(kernelTsValidBits) / 2; + } else { + expectedTsRefreshIntervalInNanoSec = static_cast((maxNBitValue(kernelTsValidBits) * deviceTsFrequency) / 2); + } + + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; + auto event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); + ASSERT_NE(nullptr, event); + + // Reset before setting + NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs(); + resetReferenceTs->cpuTimeinNS = std::numeric_limits::max(); + resetReferenceTs->gpuTimeStamp = std::numeric_limits::max(); + + event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1); + EXPECT_NE(resetReferenceTs->cpuTimeinNS, std::numeric_limits::max()); + EXPECT_NE(resetReferenceTs->gpuTimeStamp, std::numeric_limits::max()); +} + +HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenCorrectRefreshIntervalIsCalculated) { + + const uint32_t refereshIntervalMillisec = 10; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_NE(nullptr, eventPool); + + uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000; + + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; + auto event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); + ASSERT_NE(nullptr, event); + + // Reset before setting + NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs(); + resetReferenceTs->cpuTimeinNS = 0; + resetReferenceTs->gpuTimeStamp = 0; + + event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1); + EXPECT_NE(resetReferenceTs->cpuTimeinNS, 0u); + EXPECT_NE(resetReferenceTs->gpuTimeStamp, 0u); +} + +HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenRefreshIntervalIsNotCalculatedIfCpuTimeLessThanInterval) { + + const uint32_t refereshIntervalMillisec = 10; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_NE(nullptr, eventPool); + + uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000; + + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; + auto event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); + ASSERT_NE(nullptr, event); + + // Reset before setting + NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs(); + resetReferenceTs->cpuTimeinNS = 1; + resetReferenceTs->gpuTimeStamp = 1; + + event->setReferenceTs(expectedTsRefreshIntervalInNanoSec - 2); + EXPECT_EQ(resetReferenceTs->cpuTimeinNS, 1u); + EXPECT_EQ(resetReferenceTs->gpuTimeStamp, 1u); +} + HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) { typename MockTimestampPackets32::Packet data = {}; data.contextStart = 1u; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 2eec9fe36b..5249bc54d7 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -557,9 +557,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists") DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum") DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled") -DECLARE_DEBUG_VARIABLE(int32_t, CommandListTimestampRefreshIntervalInMilliSec, -1, "-1: use driver default, This value sets the refresh interval for getting synchronized GPU and CPU timestamp") DECLARE_DEBUG_VARIABLE(int32_t, ForcePreferredAllocationMethod, -1, "Sets preferred allocation method for Wddm paths; values = -1: driver default, 0: UseUmdSystemPtr, 1: AllocateByKmd") - +DECLARE_DEBUG_VARIABLE(int32_t, EventTimestampRefreshIntervalInMilliSec, -1, "-1: use driver default, This value sets the refresh interval for getting synchronized GPU and CPU timestamp") /* Binary Cache */ DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 72ec3446fc..f422d7e65e 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -540,7 +540,7 @@ VfBarResourceAllocationWa = 1 EnableDynamicPostSyncAllocLayout = -1 PrintTimestampPacketUsage = -1 TrackNumCsrClientsOnSyncPoints = -1 -CommandListTimestampRefreshIntervalInMilliSec = -1 +EventTimestampRefreshIntervalInMilliSec = -1 SynchronizeEventBeforeReset = -1 RemoveRestrictionsOnNumberOfThreadsInGpgpuThreadGroup = 0 SkipDcFlushOnBarrierWithoutEvents = -1