fix: use kernel timestamp properties instead of global timestamp
Related-To: NEO-11555 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
parent
ad374fbd8f
commit
deefea51ee
|
@ -10,6 +10,7 @@
|
|||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/sub_device.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
|
@ -738,12 +739,21 @@ template <typename TagSizeT>
|
|||
void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,
|
||||
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer) {
|
||||
|
||||
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
const auto resolution = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, resolution);
|
||||
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
|
||||
const auto numBitsForResolution = Math::log2(static_cast<uint64_t>(resolution)) + 1u;
|
||||
const auto clampedBitsCount = std::min(hwInfo.capabilityTable.kernelTimestampValidBits, 64u - numBitsForResolution);
|
||||
const auto maxClampedTsValue = maxNBitValue(clampedBitsCount);
|
||||
|
||||
auto convertDeviceTsToNanoseconds = [&resolution, &maxClampedTsValue](uint64_t deviceTs) {
|
||||
// Use clamped maximum to avoid overflows
|
||||
return static_cast<uint64_t>((deviceTs & maxClampedTsValue) * resolution);
|
||||
};
|
||||
|
||||
auto deviceTsInNs = convertDeviceTsToNanoseconds(referenceTs.gpuTimeStamp);
|
||||
|
||||
auto getDuration = [&](uint64_t startTs, uint64_t endTs) {
|
||||
const uint64_t maxValue = maxKernelTsValue;
|
||||
startTs &= maxValue;
|
||||
|
@ -772,8 +782,8 @@ void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timesta
|
|||
int64_t offset = tsOffsetInNs;
|
||||
uint64_t startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * resolution) + offset;
|
||||
if (startTimeStampInNs < referenceHostTsInNs) {
|
||||
offset += static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
|
||||
startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * resolution) + offset;
|
||||
offset += static_cast<uint64_t>(convertDeviceTsToNanoseconds(maxKernelTsValue));
|
||||
startTimeStampInNs = static_cast<uint64_t>(convertDeviceTsToNanoseconds(deviceTs->kernelStart) + offset);
|
||||
}
|
||||
|
||||
// Get the kernel timestamp duration
|
||||
|
@ -805,6 +815,10 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestampsExt(Device *device, uint32_
|
|||
return queryTimestampsExp(device, pCount, nullptr);
|
||||
}
|
||||
|
||||
if (queryStatus() != ZE_RESULT_SUCCESS) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
ze_result_t status = queryTimestampsExp(device, pCount, pResults->pKernelTimestampsBuffer);
|
||||
|
||||
if (status == ZE_RESULT_SUCCESS && hasKerneMappedTsCapability) {
|
||||
|
|
|
@ -698,6 +698,11 @@ bool testKernelMappedTimestampMap(int argc, char *argv[],
|
|||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
}
|
||||
|
||||
uint64_t referenceHostTs, referenceDeviceTs = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &referenceHostTs, &referenceDeviceTs));
|
||||
std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "| ReferenceHostTs: " << referenceHostTs << "\n";
|
||||
previousMaximumSyncTs = referenceHostTs;
|
||||
|
||||
for (uint32_t i = 0; i < 10; i++) {
|
||||
|
||||
if (!useImmediate) {
|
||||
|
@ -722,7 +727,6 @@ bool testKernelMappedTimestampMap(int argc, char *argv[],
|
|||
if (verboseLevel == 1) {
|
||||
std::cout << "[iter(" << i << ")][event(" << j << ")]====>\n";
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(zeEventQueryStatus(kernelTsEvent[j]));
|
||||
SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, nullptr));
|
||||
if (count == 0) {
|
||||
return false;
|
||||
|
@ -754,12 +758,21 @@ bool testKernelMappedTimestampMap(int argc, char *argv[],
|
|||
<< "[global-ts(" << ts.global.kernelStart << " , " << ts.global.kernelEnd << " ) "
|
||||
<< "| syncTs( " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd << " )] "
|
||||
<< "# [context-ts( " << ts.context.kernelStart << " , " << ts.context.kernelEnd << " ) "
|
||||
<< "| syncTs ( " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << " )]\n";
|
||||
<< "| syncTs ( " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << " )]"
|
||||
<< "| timeTaken (" << currentMinimumSyncTs - previousMaximumSyncTs << " ns)"
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
if (verboseLevel == 2) {
|
||||
std::cout << "KernelSyncTs: " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd
|
||||
<< " | ContextSyncTs: " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << "\n";
|
||||
<< " | ContextSyncTs: " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd
|
||||
<< "| timeTaken (" << currentMinimumSyncTs - previousMaximumSyncTs << " ns)"
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
if ((currentMinimumSyncTs - previousMaximumSyncTs) > 10 * 1E9) {
|
||||
std::cout << "\n\n!!FAILED: Time Taken Too long! (Current Minimum Ts : " << currentMinimumSyncTs << " | Previous Maximum Ts : " << previousMaximumSyncTs << ")\n\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(zeEventHostReset(kernelTsEvent[j]));
|
||||
|
|
|
@ -2212,6 +2212,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenpCountLargerThanSupportedWhenCallingQ
|
|||
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||
results.pSynchronizedTimestampsBuffer = nullptr;
|
||||
event->hostSignal(false);
|
||||
|
||||
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
|
||||
|
||||
|
@ -2229,6 +2230,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryK
|
|||
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||
results.pSynchronizedTimestampsBuffer = nullptr;
|
||||
event->hostSignal(false);
|
||||
|
||||
uint32_t pCount = 10;
|
||||
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
|
||||
|
@ -2236,17 +2238,34 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryK
|
|||
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
|
||||
}
|
||||
|
||||
TEST_F(EventqueryKernelTimestampsExt, givenEventStatusNotReadyThenQueryKernelTimestampsExtReturnsNotReady) {
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::debugManager.flags.EnableStaticPartitioning.set(0);
|
||||
|
||||
event->hasKerneMappedTsCapability = true;
|
||||
|
||||
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
|
||||
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||
results.pSynchronizedTimestampsBuffer = nullptr;
|
||||
|
||||
uint32_t pCount = 10;
|
||||
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
||||
}
|
||||
|
||||
TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhenQueryKernelTimestampsExtIsCalledCorrectValuesAreReturned) {
|
||||
|
||||
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||
typename MockTimestampPackets32::Packet packetData[3];
|
||||
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits = 32;
|
||||
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
|
||||
event->setPacketsInUse(3u);
|
||||
event->hasKerneMappedTsCapability = true;
|
||||
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||
const int64_t gpuReferenceTimeInNs = 2000;
|
||||
const int64_t cpuReferenceTimeInNs = 3000;
|
||||
const auto maxKernelTsValue = maxNBitValue(32);
|
||||
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||
|
||||
NEO::TimeStampData *referenceTs = event->peekReferenceTs();
|
||||
referenceTs->cpuTimeinNS = cpuReferenceTimeInNs;
|
||||
|
@ -2285,11 +2304,6 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
|||
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||
results.pSynchronizedTimestampsBuffer = synchronizedTsBuffer.data();
|
||||
|
||||
for (uint32_t packetId = 0; packetId < count; packetId++) {
|
||||
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t, NEO::TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize());
|
||||
}
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, &results));
|
||||
uint64_t errorOffset = 5;
|
||||
// Packet 1
|
||||
|
@ -2309,7 +2323,7 @@ TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhe
|
|||
|
||||
// Packet 2
|
||||
expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 500u +
|
||||
static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * deviceTsFrequency);
|
||||
static_cast<uint64_t>(maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits) * deviceTsFrequency);
|
||||
expectedGlobalEnd = expectedGlobalStart + (1500 - 500);
|
||||
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart - errorOffset);
|
||||
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart + errorOffset);
|
||||
|
|
Loading…
Reference in New Issue