mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
fix: Move event reference time tracking into event class
This would avoid recalculating reference timestamps when event is used with different command lists. Related-To: LOCI-4563 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
175ceb9bd1
commit
b6e76b9118
@@ -394,7 +394,6 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
uint32_t commandListPerThreadPrivateScratchSize = 0u;
|
||||
uint32_t partitionCount = 1;
|
||||
uint32_t defaultMocsIndex = 0;
|
||||
uint64_t timestampRefreshIntervalInNanoSec = 0;
|
||||
|
||||
bool isFlushTaskSubmissionEnabled = false;
|
||||
bool isSyncModeQueue = false;
|
||||
|
||||
@@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
cmdListCurrentStartOffset = 0;
|
||||
|
||||
mappedTsEventList.clear();
|
||||
previousSynchronizedTimestamp = {};
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -236,20 +234,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
}
|
||||
|
||||
createLogicalStateHelper();
|
||||
|
||||
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
|
||||
this->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency);
|
||||
} else {
|
||||
this->timestampRefreshIntervalInNanoSec = maxKernelTsValue;
|
||||
}
|
||||
if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) {
|
||||
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
|
||||
const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get();
|
||||
this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
|
||||
}
|
||||
|
||||
return returnType;
|
||||
}
|
||||
|
||||
|
||||
@@ -246,14 +246,8 @@ void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
|
||||
if (mappedTsEventList.size()) {
|
||||
uint64_t currentCpuTimeStamp = 0;
|
||||
device->getNEODevice()->getOSTime()->getCpuTime(¤tCpuTimeStamp);
|
||||
const auto recalculate =
|
||||
(currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||
if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) {
|
||||
device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp);
|
||||
}
|
||||
|
||||
for (auto &event : mappedTsEventList) {
|
||||
event->setReferenceTs(previousSynchronizedTimestamp);
|
||||
event->setReferenceTs(currentCpuTimeStamp);
|
||||
}
|
||||
|
||||
if (isClearEnabled) {
|
||||
|
||||
@@ -56,7 +56,6 @@ struct CommandListImp : CommandList {
|
||||
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
|
||||
static constexpr bool cmdListDefaultGlobalAtomics = false;
|
||||
std::vector<Event *> mappedTsEventList{};
|
||||
NEO::TimeStampData previousSynchronizedTimestamp{};
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -410,4 +410,12 @@ void Event::unsetCmdQueue(bool unregisterClient) {
|
||||
latestUsedCmdQueue = nullptr;
|
||||
}
|
||||
|
||||
void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
|
||||
const auto recalculate =
|
||||
(currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||
if (referenceTs.cpuTimeinNS == 0 || recalculate) {
|
||||
device->getNEODevice()->getOSTime()->getCpuGpuTime(&referenceTs);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -221,9 +221,10 @@ struct Event : _ze_event_handle_t {
|
||||
uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
|
||||
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
|
||||
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
|
||||
void setReferenceTs(NEO::TimeStampData ×tamp) {
|
||||
referenceTs = timestamp;
|
||||
NEO::TimeStampData *peekReferenceTs() {
|
||||
return &referenceTs;
|
||||
}
|
||||
void setReferenceTs(uint64_t currentCpuTimeStamp);
|
||||
bool hasKerneMappedTsCapability = false;
|
||||
|
||||
protected:
|
||||
@@ -282,6 +283,7 @@ struct Event : _ze_event_handle_t {
|
||||
bool signalAllEventPackets = false;
|
||||
bool isFromIpcPool = false;
|
||||
bool inOrderExecEvent = false;
|
||||
uint64_t timestampRefreshIntervalInNanoSec = 0;
|
||||
};
|
||||
|
||||
struct EventPool : _ze_event_pool_handle_t {
|
||||
|
||||
@@ -67,6 +67,19 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
event->resetDeviceCompletionData(true);
|
||||
}
|
||||
|
||||
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
|
||||
event->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency) / 2;
|
||||
} else {
|
||||
event->timestampRefreshIntervalInNanoSec = maxKernelTsValue / 2;
|
||||
}
|
||||
if (NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get() != -1) {
|
||||
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
|
||||
const uint32_t refreshTime = NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.get();
|
||||
event->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
|
||||
@@ -3137,42 +3137,9 @@ HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhen
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
neoDevice->setOSTime(new MockOSTime());
|
||||
commandList->addToMappedEventList(event.get());
|
||||
commandList->storeReferenceTsToMappedEvents(true);
|
||||
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) {
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
createKernel();
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||
eventPoolDesc.count = 2;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.wait = 0;
|
||||
eventDesc.signal = 0;
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp());
|
||||
auto osTime = static_cast<MockOSTimeWithConfigurableCpuTimestamp *>(neoDevice->getOSTime());
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
commandList->addToMappedEventList(event.get());
|
||||
osTime->mockCpuTime = 1;
|
||||
commandList->storeReferenceTsToMappedEvents(false);
|
||||
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
|
||||
commandList->addToMappedEventList(event.get());
|
||||
commandList->storeReferenceTsToMappedEvents(false);
|
||||
osTime->mockCpuTime = 2;
|
||||
commandList->addToMappedEventList(event.get());
|
||||
commandList->storeReferenceTsToMappedEvents(true);
|
||||
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||
}
|
||||
|
||||
@@ -2124,8 +2124,9 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
||||
const int64_t cpuReferenceTimeInNs = 3000;
|
||||
const auto maxKernelTsValue = maxNBitValue(32);
|
||||
|
||||
NEO::TimeStampData referenceTs{static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs};
|
||||
event->setReferenceTs(referenceTs);
|
||||
NEO::TimeStampData *referenceTs = event->peekReferenceTs();
|
||||
referenceTs->cpuTimeinNS = cpuReferenceTimeInNs;
|
||||
referenceTs->gpuTimeStamp = static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency);
|
||||
|
||||
auto timeToTimeStamp = [&](uint32_t timeInNs) {
|
||||
return static_cast<uint32_t>(timeInNs / deviceTsFrequency);
|
||||
@@ -2214,6 +2215,101 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset);
|
||||
}
|
||||
|
||||
using HostMappedEventTests = Test<DeviceFixture>;
|
||||
HWTEST_F(HostMappedEventTests, givenMappedEventsWhenSettingRefereshTimestampThenCorrectRefreshIntervalIsCalculated) {
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
const auto kernelTsValidBits = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits;
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
uint64_t expectedTsRefreshIntervalInNanoSec = 0u;
|
||||
if (kernelTsValidBits >= 64) {
|
||||
expectedTsRefreshIntervalInNanoSec = maxNBitValue(kernelTsValidBits) / 2;
|
||||
} else {
|
||||
expectedTsRefreshIntervalInNanoSec = static_cast<uint64_t>((maxNBitValue(kernelTsValidBits) * deviceTsFrequency) / 2);
|
||||
}
|
||||
|
||||
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
|
||||
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
// Reset before setting
|
||||
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
|
||||
resetReferenceTs->cpuTimeinNS = std::numeric_limits<uint64_t>::max();
|
||||
resetReferenceTs->gpuTimeStamp = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1);
|
||||
EXPECT_NE(resetReferenceTs->cpuTimeinNS, std::numeric_limits<uint64_t>::max());
|
||||
EXPECT_NE(resetReferenceTs->gpuTimeStamp, std::numeric_limits<uint64_t>::max());
|
||||
}
|
||||
|
||||
HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenCorrectRefreshIntervalIsCalculated) {
|
||||
|
||||
const uint32_t refereshIntervalMillisec = 10;
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000;
|
||||
|
||||
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
|
||||
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
// Reset before setting
|
||||
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
|
||||
resetReferenceTs->cpuTimeinNS = 0;
|
||||
resetReferenceTs->gpuTimeStamp = 0;
|
||||
|
||||
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec + 1);
|
||||
EXPECT_NE(resetReferenceTs->cpuTimeinNS, 0u);
|
||||
EXPECT_NE(resetReferenceTs->gpuTimeStamp, 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(HostMappedEventTests, givenEventTimestampRefreshIntervalInMilliSecIsSetThenRefreshIntervalIsNotCalculatedIfCpuTimeLessThanInterval) {
|
||||
|
||||
const uint32_t refereshIntervalMillisec = 10;
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EventTimestampRefreshIntervalInMilliSec.set(refereshIntervalMillisec);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
uint64_t expectedTsRefreshIntervalInNanoSec = refereshIntervalMillisec * 1000000;
|
||||
|
||||
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
|
||||
auto event = std::unique_ptr<EventImp<uint64_t>>(static_cast<EventImp<uint64_t> *>(L0::Event::create<uint64_t>(eventPool.get(), &eventDesc, device)));
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
// Reset before setting
|
||||
NEO::TimeStampData *resetReferenceTs = event->peekReferenceTs();
|
||||
resetReferenceTs->cpuTimeinNS = 1;
|
||||
resetReferenceTs->gpuTimeStamp = 1;
|
||||
|
||||
event->setReferenceTs(expectedTsRefreshIntervalInNanoSec - 2);
|
||||
EXPECT_EQ(resetReferenceTs->cpuTimeinNS, 1u);
|
||||
EXPECT_EQ(resetReferenceTs->gpuTimeStamp, 1u);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
|
||||
typename MockTimestampPackets32::Packet data = {};
|
||||
data.contextStart = 1u;
|
||||
|
||||
Reference in New Issue
Block a user