fix: Move event reference time tracking into event class

This would avoid recalculating reference timestamps
when event is used with different command lists.

Related-To: LOCI-4563

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2023-07-06 08:35:07 +00:00
committed by Compute-Runtime-Automation
parent 175ceb9bd1
commit b6e76b9118
11 changed files with 126 additions and 65 deletions

View File

@@ -394,7 +394,6 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t commandListPerThreadPrivateScratchSize = 0u;
uint32_t partitionCount = 1;
uint32_t defaultMocsIndex = 0;
uint64_t timestampRefreshIntervalInNanoSec = 0;
bool isFlushTaskSubmissionEnabled = false;
bool isSyncModeQueue = false;

View File

@@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
cmdListCurrentStartOffset = 0;
mappedTsEventList.clear();
previousSynchronizedTimestamp = {};
return ZE_RESULT_SUCCESS;
}
@@ -236,20 +234,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
}
createLogicalStateHelper();
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
this->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency);
} else {
this->timestampRefreshIntervalInNanoSec = maxKernelTsValue;
}
if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) {
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get();
this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
}
return returnType;
}

View File

@@ -246,14 +246,8 @@ void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
if (mappedTsEventList.size()) {
uint64_t currentCpuTimeStamp = 0;
device->getNEODevice()->getOSTime()->getCpuTime(&currentCpuTimeStamp);
const auto recalculate =
(currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) {
device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp);
}
for (auto &event : mappedTsEventList) {
event->setReferenceTs(previousSynchronizedTimestamp);
event->setReferenceTs(currentCpuTimeStamp);
}
if (isClearEnabled) {

View File

@@ -56,7 +56,6 @@ struct CommandListImp : CommandList {
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
static constexpr bool cmdListDefaultGlobalAtomics = false;
std::vector<Event *> mappedTsEventList{};
NEO::TimeStampData previousSynchronizedTimestamp{};
};
} // namespace L0

View File

@@ -410,4 +410,12 @@ void Event::unsetCmdQueue(bool unregisterClient) {
latestUsedCmdQueue = nullptr;
}
void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
const auto recalculate =
(currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
if (referenceTs.cpuTimeinNS == 0 || recalculate) {
device->getNEODevice()->getOSTime()->getCpuGpuTime(&referenceTs);
}
}
} // namespace L0

View File

@@ -221,9 +221,10 @@ struct Event : _ze_event_handle_t {
uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
void setReferenceTs(NEO::TimeStampData &timestamp) {
referenceTs = timestamp;
NEO::TimeStampData *peekReferenceTs() {
return &referenceTs;
}
void setReferenceTs(uint64_t currentCpuTimeStamp);
bool hasKerneMappedTsCapability = false;
protected:
@@ -282,6 +283,7 @@ struct Event : _ze_event_handle_t {
bool signalAllEventPackets = false;
bool isFromIpcPool = false;
bool inOrderExecEvent = false;
uint64_t timestampRefreshIntervalInNanoSec = 0;
};
struct EventPool : _ze_event_pool_handle_t {

View File

@@ -67,6 +67,19 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->resetDeviceCompletionData(true);
}
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
event->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency) / 2;
} else {
event->timestampRefreshIntervalInNanoSec = maxKernelTsValue / 2;
}
if (NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get() != -1) {
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
const uint32_t refreshTime = NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get();
event->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
}
return event;
}

View File

@@ -3137,42 +3137,9 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
neoDevice->setOSTime(new MockOSTime());
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(true);
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
}
HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) {
DebugManagerStateRestore restorer;
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp());
auto osTime = static_cast<MockOSTimeWithConfigurableCpuTimestamp *>(neoDevice->getOSTime());
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
commandList->addToMappedEventList(event.get());
osTime->mockCpuTime = 1;
commandList->storeReferenceTsToMappedEvents(false);
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(false);
osTime->mockCpuTime = 2;
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(true);
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
}

View File

@@ -2124,8 +2124,9 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
const int64_t cpuReferenceTimeInNs = 3000;
const auto maxKernelTsValue = maxNBitValue(32);
NEO::TimeStampData referenceTs{static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs};
event->setReferenceTs(referenceTs);
NEO::TimeStampData *referenceTs = event->peekReferenceTs();
referenceTs->cpuTimeinNS = cpuReferenceTimeInNs;
referenceTs->gpuTimeStamp = static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency);
auto timeToTimeStamp = [&](uint32_t timeInNs) {
return static_cast<uint32_t>(timeInNs / deviceTsFrequency);
@@ -2214,6 +2215,101 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset);
}
using HostMappedEventTests = Test<DeviceFixture>;
HWTEST_F(HostMappedEventTests, givenMappedEventsWhenSettingRefereshTimestampThenCorrectRefreshIntervalIsCalculated) {
ze_event_pool_desc_t eventPoolDesc = {};
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const auto kernelTsValidBits = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits;
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
uint64_t expectedTsRefreshIntervalInNanoSec = 0u;
if (kernelTsValidBits >= 64) {
expectedTsRefreshIntervalInNanoSec = maxNBitValue(kernelTsValidBits) / 2;
} else {
expectedTsRefreshIntervalInNanoSec = static_cast<uint64_t>((maxNBitValue(kernelTsValidBits) * deviceTsFrequency) / 2);
}
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
ASSERT_NE(nullptr, event);
// Reset before setting
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
resetReferenceTs->cpuTimeinNS = std::numeric_limits<uint64_t>::max();
resetReferenceTs->gpuTimeStamp = std::numeric_limits<uint64_t>::max();
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1);
EXPECT_NE(resetReferenceTs->cpuTimeinNS, std::numeric_limits<uint64_t>::max());
EXPECT_NE(resetReferenceTs->gpuTimeStamp, std::numeric_limits<uint64_t>::max());
}
HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenCorrectRefreshIntervalIsCalculated) {
const uint32_t refereshIntervalMillisec = 10;
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000;
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
ASSERT_NE(nullptr, event);
// Reset before setting
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
resetReferenceTs->cpuTimeinNS = 0;
resetReferenceTs->gpuTimeStamp = 0;
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1);
EXPECT_NE(resetReferenceTs->cpuTimeinNS, 0u);
EXPECT_NE(resetReferenceTs->gpuTimeStamp, 0u);
}
HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenRefreshIntervalIsNotCalculatedIfCpuTimeLessThanInterval) {
const uint32_t refereshIntervalMillisec = 10;
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000;
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
ASSERT_NE(nullptr, event);
// Reset before setting
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
resetReferenceTs->cpuTimeinNS = 1;
resetReferenceTs->gpuTimeStamp = 1;
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec - 2);
EXPECT_EQ(resetReferenceTs->cpuTimeinNS, 1u);
EXPECT_EQ(resetReferenceTs->gpuTimeStamp, 1u);
}
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
typename MockTimestampPackets32::Packet data = {};
data.contextStart = 1u;