mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
fix: Mapped time stamp retrieval with IPC
Create shared allocations to maintain reference timestamps. Add flag to IPC pool data to check whether mapped time stamp flag is set. Related-To: NEO-12313 Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
474b91aa36
commit
ad3110ff67
@@ -83,7 +83,7 @@ zexCounterBasedEventCreate2(ze_context_handle_t hContext, ze_device_handle_t hDe
|
||||
signalScope, // signalScope
|
||||
desc->waitScope, // waitScope
|
||||
timestampFlag, // timestampPool
|
||||
mappedTimestampFlag, // kerneMappedTsPoolFlag
|
||||
mappedTimestampFlag, // kernelMappedTsPoolFlag
|
||||
false, // importedIpcPool
|
||||
ipcFlag, // ipcPool
|
||||
};
|
||||
|
||||
@@ -329,7 +329,7 @@ void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
|
||||
}
|
||||
|
||||
void CommandListImp::addToMappedEventList(Event *event) {
|
||||
if (event && event->hasKerneMappedTsCapability) {
|
||||
if (event && event->hasKernelMappedTsCapability) {
|
||||
if (std::find(mappedTsEventList.begin(), mappedTsEventList.end(), event) == mappedTsEventList.end()) {
|
||||
mappedTsEventList.push_back(event);
|
||||
}
|
||||
|
||||
@@ -210,9 +210,15 @@ void EventPool::initializeSizeParameters(uint32_t numDevices, ze_device_handle_t
|
||||
eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles);
|
||||
maxKernelCount = driver.getEventMaxKernelCount(numDevices, deviceHandles);
|
||||
}
|
||||
setEventSize(static_cast<uint32_t>(alignUp(eventPackets * gfxCoreHelper.getSingleTimestampPacketSize(), eventAlignment)));
|
||||
|
||||
eventPoolSize = alignUp<size_t>(this->numEvents * eventSize, MemoryConstants::pageSize64k);
|
||||
auto eventSize = eventPackets * gfxCoreHelper.getSingleTimestampPacketSize();
|
||||
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
|
||||
eventSize += sizeof(NEO::TimeStampData);
|
||||
}
|
||||
|
||||
setEventSize(static_cast<uint32_t>(alignUp(eventSize, eventAlignment)));
|
||||
|
||||
eventPoolSize = alignUp<size_t>(this->numEvents * this->eventSize, MemoryConstants::pageSize64k);
|
||||
}
|
||||
|
||||
EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles, const ze_event_pool_desc_t *desc, ze_result_t &result) {
|
||||
@@ -311,7 +317,7 @@ ze_result_t Event::openCounterBasedIpcHandle(const IpcCounterBasedEventData &ipc
|
||||
ipcData.signalScopeFlags, // signalScope
|
||||
ipcData.waitScopeFlags, // waitScope
|
||||
false, // timestampPool
|
||||
false, // kerneMappedTsPoolFlag
|
||||
false, // kernelMappedTsPoolFlag
|
||||
true, // importedIpcPool
|
||||
false, // ipcPool
|
||||
};
|
||||
@@ -385,6 +391,7 @@ ze_result_t EventPool::getIpcHandle(ze_ipc_event_pool_handle_t *ipcHandle) {
|
||||
poolData.isImplicitScalingCapable = this->isImplicitScalingCapable;
|
||||
poolData.maxEventPackets = this->getEventMaxPackets();
|
||||
poolData.numDevices = static_cast<uint32_t>(this->devices.size());
|
||||
poolData.isEventPoolKernelMappedTsFlagSet = this->isEventPoolKernelMappedTsFlagSet();
|
||||
|
||||
auto memoryManager = this->context->getDriverHandle()->getMemoryManager();
|
||||
auto allocation = this->eventPoolAllocations->getDefaultGraphicsAllocation();
|
||||
@@ -402,6 +409,9 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
|
||||
const IpcEventPoolData &poolData = *reinterpret_cast<const IpcEventPoolData *>(ipcEventPoolHandle.data);
|
||||
|
||||
ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
|
||||
if (poolData.isEventPoolKernelMappedTsFlagSet) {
|
||||
desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||
}
|
||||
desc.count = static_cast<uint32_t>(poolData.numEvents);
|
||||
auto eventPool = std::make_unique<EventPool>(&desc);
|
||||
eventPool->isDeviceEventPoolAllocation = poolData.isDeviceEventPoolAllocation;
|
||||
@@ -640,10 +650,11 @@ void Event::unsetCmdQueue() {
|
||||
}
|
||||
|
||||
void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
|
||||
NEO::TimeStampData *referenceTs = static_cast<NEO::TimeStampData *>(ptrOffset(getHostAddress(), maxPacketCount * singlePacketSize));
|
||||
const auto recalculate =
|
||||
(currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||
if (referenceTs.cpuTimeinNS == 0 || recalculate) {
|
||||
device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs, true);
|
||||
(currentCpuTimeStamp - referenceTs->cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||
if (referenceTs->cpuTimeinNS == 0 || recalculate) {
|
||||
device->getNEODevice()->getOSTime()->getGpuCpuTime(referenceTs, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/timestamp_packet_constants.h"
|
||||
#include "shared/source/helpers/timestamp_packet_container.h"
|
||||
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
||||
@@ -61,6 +62,7 @@ struct IpcEventPoolData {
|
||||
bool isDeviceEventPoolAllocation = false;
|
||||
bool isHostVisibleEventPoolAllocation = false;
|
||||
bool isImplicitScalingCapable = false;
|
||||
bool isEventPoolKernelMappedTsFlagSet = false;
|
||||
};
|
||||
|
||||
struct IpcCounterBasedEventData {
|
||||
@@ -95,7 +97,7 @@ struct EventDescriptor {
|
||||
uint32_t signalScope = 0;
|
||||
uint32_t waitScope = 0;
|
||||
bool timestampPool = false;
|
||||
bool kerneMappedTsPoolFlag = false;
|
||||
bool kernelMappedTsPoolFlag = false;
|
||||
bool importedIpcPool = false;
|
||||
bool ipcPool = false;
|
||||
};
|
||||
@@ -303,11 +305,11 @@ struct Event : _ze_event_handle_t {
|
||||
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
|
||||
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
|
||||
NEO::TimeStampData *peekReferenceTs() {
|
||||
return &referenceTs;
|
||||
return static_cast<NEO::TimeStampData *>(ptrOffset(getHostAddress(), getMaxPacketsCount() * getSinglePacketSize()));
|
||||
}
|
||||
void setReferenceTs(uint64_t currentCpuTimeStamp);
|
||||
const CommandQueue *getLatestUsedCmdQueue() const { return latestUsedCmdQueue; }
|
||||
bool hasKerneMappedTsCapability = false;
|
||||
bool hasKernelMappedTsCapability = false;
|
||||
std::shared_ptr<NEO::InOrderExecInfo> &getInOrderExecInfo() { return inOrderExecInfo; }
|
||||
void enableKmdWaitMode() { kmdWaitMode = true; }
|
||||
void enableInterruptMode() { interruptMode = true; }
|
||||
@@ -348,7 +350,6 @@ struct Event : _ze_event_handle_t {
|
||||
uint64_t globalEndTS = 1;
|
||||
uint64_t contextStartTS = 1;
|
||||
uint64_t contextEndTS = 1;
|
||||
NEO::TimeStampData referenceTs{};
|
||||
|
||||
uint64_t inOrderExecSignalValue = 0;
|
||||
uint32_t inOrderAllocationOffset = 0;
|
||||
@@ -446,7 +447,7 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEventPoolKerneMappedTsFlagSet() const {
|
||||
bool isEventPoolKernelMappedTsFlagSet() const {
|
||||
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ Event *Event::create(const EventDescriptor &eventDescriptor, Device *device, ze_
|
||||
event->setEventTimestampFlag(true);
|
||||
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
|
||||
}
|
||||
event->hasKerneMappedTsCapability = eventDescriptor.kerneMappedTsPoolFlag;
|
||||
event->hasKernelMappedTsCapability = eventDescriptor.kernelMappedTsPoolFlag;
|
||||
|
||||
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
|
||||
|
||||
@@ -118,19 +118,19 @@ Event *Event::create(const EventDescriptor &eventDescriptor, Device *device, ze_
|
||||
template <typename TagSizeT>
|
||||
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
|
||||
EventDescriptor eventDescriptor = {
|
||||
&eventPool->getAllocation(), // eventPoolAllocation
|
||||
desc->pNext, // extensions
|
||||
eventPool->getEventSize(), // totalEventSize
|
||||
eventPool->getMaxKernelCount(), // maxKernelCount
|
||||
eventPool->getEventMaxPackets(), // maxPacketsCount
|
||||
eventPool->getCounterBasedFlags(), // counterBasedFlags
|
||||
desc->index, // index
|
||||
desc->signal, // signalScope
|
||||
desc->wait, // waitScope
|
||||
eventPool->isEventPoolTimestampFlagSet(), // timestampPool
|
||||
eventPool->isEventPoolKerneMappedTsFlagSet(), // kerneMappedTsPoolFlag
|
||||
eventPool->getImportedIpcPool(), // importedIpcPool
|
||||
eventPool->isIpcPoolFlagSet(), // ipcPool
|
||||
&eventPool->getAllocation(), // eventPoolAllocation
|
||||
desc->pNext, // extensions
|
||||
eventPool->getEventSize(), // totalEventSize
|
||||
eventPool->getMaxKernelCount(), // maxKernelCount
|
||||
eventPool->getEventMaxPackets(), // maxPacketsCount
|
||||
eventPool->getCounterBasedFlags(), // counterBasedFlags
|
||||
desc->index, // index
|
||||
desc->signal, // signalScope
|
||||
desc->wait, // waitScope
|
||||
eventPool->isEventPoolTimestampFlagSet(), // timestampPool
|
||||
eventPool->isEventPoolKernelMappedTsFlagSet(), // kernelMappedTsPoolFlag
|
||||
eventPool->getImportedIpcPool(), // importedIpcPool
|
||||
eventPool->isIpcPoolFlagSet(), // ipcPool
|
||||
};
|
||||
|
||||
if (eventPool->getCounterBasedFlags() != 0 && standaloneInOrderTimestampAllocationEnabled()) {
|
||||
@@ -886,7 +886,8 @@ void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timesta
|
||||
return static_cast<uint64_t>((deviceTs & maxClampedTsValue) * resolution);
|
||||
};
|
||||
|
||||
auto deviceTsInNs = convertDeviceTsToNanoseconds(referenceTs.gpuTimeStamp);
|
||||
NEO::TimeStampData *referenceTs = static_cast<NEO::TimeStampData *>(ptrOffset(getHostAddress(), maxPacketCount * singlePacketSize));
|
||||
auto deviceTsInNs = convertDeviceTsToNanoseconds(referenceTs->gpuTimeStamp);
|
||||
|
||||
auto getDuration = [&](uint64_t startTs, uint64_t endTs) {
|
||||
const uint64_t maxValue = maxKernelTsValue;
|
||||
@@ -901,7 +902,7 @@ void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timesta
|
||||
}
|
||||
};
|
||||
|
||||
const auto &referenceHostTsInNs = referenceTs.cpuTimeinNS;
|
||||
const auto &referenceHostTsInNs = referenceTs->cpuTimeinNS;
|
||||
|
||||
// High Level Approach:
|
||||
// startTimeStamp = (referenceHostTsInNs - submitDeviceTs) + kernelDeviceTsStart
|
||||
@@ -955,7 +956,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestampsExt(Device *device, uint32_
|
||||
|
||||
ze_result_t status = queryTimestampsExp(device, pCount, pResults->pKernelTimestampsBuffer);
|
||||
|
||||
if (status == ZE_RESULT_SUCCESS && hasKerneMappedTsCapability) {
|
||||
if (status == ZE_RESULT_SUCCESS && hasKernelMappedTsCapability) {
|
||||
getSynchronizedKernelTimestamps(pResults->pSynchronizedTimestampsBuffer, *pCount, pResults->pKernelTimestampsBuffer);
|
||||
}
|
||||
return status;
|
||||
|
||||
@@ -98,7 +98,7 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
|
||||
0, // signalScope
|
||||
0, // waitScope
|
||||
false, // timestampPool
|
||||
false, // kerneMappedTsPoolFlag
|
||||
false, // kernelMappedTsPoolFlag
|
||||
false, // importedIpcPool
|
||||
false, // ipcPool
|
||||
};
|
||||
|
||||
@@ -2264,7 +2264,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryK
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::debugManager.flags.EnableStaticPartitioning.set(0);
|
||||
|
||||
event->hasKerneMappedTsCapability = true;
|
||||
event->hasKernelMappedTsCapability = true;
|
||||
|
||||
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
|
||||
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||
@@ -2282,7 +2282,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventStatusNotReadyThenQueryKernelTim
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::debugManager.flags.EnableStaticPartitioning.set(0);
|
||||
|
||||
event->hasKerneMappedTsCapability = true;
|
||||
event->hasKernelMappedTsCapability = true;
|
||||
|
||||
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
|
||||
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||
@@ -2297,43 +2297,48 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventStatusNotReadyThenQueryKernelTim
|
||||
|
||||
TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhenQueryKernelTimestampsExtIsCalledCorrectValuesAreReturned) {
|
||||
|
||||
struct MappedTimeStampData {
|
||||
typename MockTimestampPackets32::Packet packetData[3];
|
||||
NEO::TimeStampData referenceTs{};
|
||||
} mappedTimeStampData;
|
||||
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
typename MockTimestampPackets32::Packet packetData[3];
|
||||
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits = 32;
|
||||
event->setPacketsInUse(3u);
|
||||
event->hasKerneMappedTsCapability = true;
|
||||
event->hasKernelMappedTsCapability = true;
|
||||
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
const int64_t gpuReferenceTimeInNs = 2000;
|
||||
const int64_t cpuReferenceTimeInNs = 3000;
|
||||
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
|
||||
NEO::TimeStampData *referenceTs = event->peekReferenceTs();
|
||||
referenceTs->cpuTimeinNS = cpuReferenceTimeInNs;
|
||||
referenceTs->gpuTimeStamp = static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency);
|
||||
|
||||
auto timeToTimeStamp = [&](uint32_t timeInNs) {
|
||||
return static_cast<uint32_t>(timeInNs / deviceTsFrequency);
|
||||
};
|
||||
|
||||
packetData[0].contextStart = 50u;
|
||||
packetData[0].contextEnd = 100u;
|
||||
packetData[0].globalStart = timeToTimeStamp(4000u);
|
||||
packetData[0].globalEnd = timeToTimeStamp(5000u);
|
||||
mappedTimeStampData.packetData[0].contextStart = 50u;
|
||||
mappedTimeStampData.packetData[0].contextEnd = 100u;
|
||||
mappedTimeStampData.packetData[0].globalStart = timeToTimeStamp(4000u);
|
||||
mappedTimeStampData.packetData[0].globalEnd = timeToTimeStamp(5000u);
|
||||
|
||||
// Device Ts overflow case
|
||||
packetData[1].contextStart = 20u;
|
||||
packetData[1].contextEnd = 30u;
|
||||
packetData[1].globalStart = timeToTimeStamp(500u);
|
||||
packetData[1].globalEnd = timeToTimeStamp(1500u);
|
||||
mappedTimeStampData.packetData[1].contextStart = 20u;
|
||||
mappedTimeStampData.packetData[1].contextEnd = 30u;
|
||||
mappedTimeStampData.packetData[1].globalStart = timeToTimeStamp(500u);
|
||||
mappedTimeStampData.packetData[1].globalEnd = timeToTimeStamp(1500u);
|
||||
|
||||
packetData[2].contextStart = 20u;
|
||||
packetData[2].contextEnd = 30u;
|
||||
packetData[2].globalStart = timeToTimeStamp(5000u);
|
||||
packetData[2].globalEnd = timeToTimeStamp(500u);
|
||||
mappedTimeStampData.packetData[2].contextStart = 20u;
|
||||
mappedTimeStampData.packetData[2].contextEnd = 30u;
|
||||
mappedTimeStampData.packetData[2].globalStart = timeToTimeStamp(5000u);
|
||||
mappedTimeStampData.packetData[2].globalEnd = timeToTimeStamp(500u);
|
||||
|
||||
event->hostAddressFromPool = packetData;
|
||||
event->hostAddressFromPool = &mappedTimeStampData;
|
||||
event->maxPacketCount = 3;
|
||||
uint32_t count = 0;
|
||||
|
||||
NEO::TimeStampData *referenceTs = event->peekReferenceTs();
|
||||
referenceTs->cpuTimeinNS = cpuReferenceTimeInNs;
|
||||
referenceTs->gpuTimeStamp = static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, nullptr));
|
||||
EXPECT_EQ(count, 3u);
|
||||
|
||||
@@ -2355,7 +2360,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||
|
||||
auto expectedContextStart = expectedGlobalStart;
|
||||
auto expectedContextEnd = expectedContextStart + (packetData[0].contextEnd - packetData[0].contextStart) * deviceTsFrequency;
|
||||
auto expectedContextEnd = expectedContextStart + (mappedTimeStampData.packetData[0].contextEnd - mappedTimeStampData.packetData[0].contextStart) * deviceTsFrequency;
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart - errorOffset);
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart + errorOffset);
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||
@@ -2371,7 +2376,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||
|
||||
expectedContextStart = expectedGlobalStart;
|
||||
expectedContextEnd = expectedContextStart + (packetData[1].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
|
||||
expectedContextEnd = expectedContextStart + (mappedTimeStampData.packetData[1].contextEnd - mappedTimeStampData.packetData[1].contextStart) * deviceTsFrequency;
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart - errorOffset);
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart + errorOffset);
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||
@@ -2386,7 +2391,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||
|
||||
expectedContextStart = expectedGlobalStart;
|
||||
expectedContextEnd = expectedContextStart + (packetData[2].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
|
||||
expectedContextEnd = expectedContextStart + (mappedTimeStampData.packetData[2].contextEnd - mappedTimeStampData.packetData[1].contextStart) * deviceTsFrequency;
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart - errorOffset);
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart + errorOffset);
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||
|
||||
Reference in New Issue
Block a user