mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 23:24:53 +08:00
performance: Reuse GPU timestamp instead of KMD escape
This can be enabled only if related debug flag will be set. Related-To: NEO-10615 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c1004b77bf
commit
83e8ae4a20
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -889,11 +889,12 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
auto osTime = pClDevice->getDevice().getOSTime();
|
||||
|
||||
HwTimeStamps timestamp{};
|
||||
timestamp.globalStartTS = 3;
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||
event.submitTimeStamp.gpuTimeStamp = osTime->getTimestampRefreshTimeout() + 4;
|
||||
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||
TagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
@@ -909,6 +910,76 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitWhenCalculateStartTimestampThenAdjustTimestmaps) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||
|
||||
MockContext context{};
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
|
||||
HwTimeStamps timestamp{};
|
||||
timestamp.globalStartTS = 3;
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||
TagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
event.timeStampNode = ×tampNode;
|
||||
|
||||
uint64_t start = 0u;
|
||||
uint64_t submit = 0u;
|
||||
uint64_t queue = 0u;
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
|
||||
|
||||
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
|
||||
EXPECT_EQ(submit, static_cast<uint64_t>((timestamp.globalStartTS - 1) * resolution));
|
||||
EXPECT_EQ(queue, static_cast<uint64_t>((timestamp.globalStartTS - 2) * resolution));
|
||||
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitAndStartTSBelowOneWhenCalculateStartTimestampThenAdjustTimestmaps) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||
|
||||
MockContext context{};
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
|
||||
HwTimeStamps timestamp{};
|
||||
timestamp.globalStartTS = 2;
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||
TagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
event.timeStampNode = ×tampNode;
|
||||
|
||||
uint64_t start = 0u;
|
||||
uint64_t submit = 0u;
|
||||
uint64_t queue = 0u;
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
|
||||
event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
|
||||
|
||||
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
|
||||
EXPECT_EQ(submit, 0ul);
|
||||
EXPECT_EQ(queue, 0ul);
|
||||
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) {
|
||||
MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user