Set as default use of device based timestamps

Related-To: NEO-7042
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-11-07 18:57:57 +00:00
committed by Compute-Runtime-Automation
parent b435c061c7
commit 2d229f3fea
7 changed files with 51 additions and 35 deletions

View File

@ -1280,10 +1280,10 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagS
auto result = event->queryKernelTimestamp(&resultTimestamp);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(resultTimestamp.context.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_EQ(resultTimestamp.global.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_EQ(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_EQ(resultTimestamp.global.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_EQ(resultTimestamp.context.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp);
EXPECT_EQ(resultTimestamp.global.kernelStart, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp);
EXPECT_EQ(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp);
EXPECT_EQ(resultTimestamp.global.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp);
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagNotSetWhenCpuMemcpyThenDontSetGpuTimestamps, IsXeHpcCore) {
@ -1308,7 +1308,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagN
auto result = event->queryKernelTimestamp(&resultTimestamp);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_NE(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_NE(resultTimestamp.context.kernelEnd, NEO::MockDeviceTimeWithConstTimestamp::gpuTimestamp);
}
using CreateCommandListXeHpcTest = Test<DeviceFixture>;

View File

@ -167,8 +167,6 @@ HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEna
EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
DebugManagerStateRestore dbgState;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
ev->queueTimeStamp.GPUTimeStamp = 1000;
ev->calculateSubmitTimestampData();
@ -207,8 +205,6 @@ HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacke
EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u);
DebugManagerStateRestore dbgState;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
ev->queueTimeStamp.GPUTimeStamp = 1000;
ev->calculateSubmitTimestampData();

View File

@ -69,6 +69,7 @@ TEST(MockOSTime, WhenGettingTimersThenDiffBetweenQueriesWithinAllowedError) {
hostOnlyDiff = hostOnlyTimestamp[1] - hostOnlyTimestamp[0];
EXPECT_LT(deviceTimestamp[0], deviceTimestamp[1]);
EXPECT_LT(hostTimestamp[0], hostOnlyTimestamp[0]);
EXPECT_LT(hostTimestamp[1], hostOnlyTimestamp[1]);
@ -115,27 +116,26 @@ TEST(MockOSTime, GivenNullWhenSettingOsTimeThenResolutionIsZero) {
}
TEST(MockOSTime, givenDeviceTimestampBaseNotEnabledWhenGetDeviceAndHostTimerThenCpuTimestampIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableDeviceBasedTimestamps.set(0);
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
mockDevice->setOSTime(new MockOSTimeWithConstTimestamp());
uint64_t deviceTS = 0u, hostTS = 0u;
mockDevice->getDeviceAndHostTimer(&deviceTS, &hostTS);
EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::CPU_TIME_IN_NS);
EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::cpuTimeInNs);
EXPECT_EQ(deviceTS, hostTS);
}
TEST(MockOSTime, givenDeviceTimestampBaseEnabledWhenGetDeviceAndHostTimerThenGpuTimestampIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
auto mockDevice = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
mockDevice->setOSTime(new MockOSTimeWithConstTimestamp());
uint64_t deviceTS = 0u, hostTS = 0u;
mockDevice->getDeviceAndHostTimer(&deviceTS, &hostTS);
EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP);
EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::gpuTimestamp);
EXPECT_NE(deviceTS, hostTS);
}

View File

@ -794,6 +794,8 @@ TEST_F(InternalsEventTest, GivenProfilingWhenUserEventCreatedThenProfilingNotSet
}
TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenGetEventProfilingInfoThenCpuTimestampIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableDeviceBasedTimestamps.set(0);
pClDevice->setOSTime(new MockOSTimeWithConstTimestamp());
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
@ -805,18 +807,41 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenGetEventProfili
uint64_t submitTime = 0ULL;
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0);
EXPECT_EQ(submitTime, MockDeviceTimeWithConstTimestamp::CPU_TIME_IN_NS);
EXPECT_EQ(submitTime, MockDeviceTimeWithConstTimestamp::cpuTimeInNs);
}
TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartTimestampThenCorrectTimeIsReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableDeviceBasedTimestamps.set(0);
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
HwTimeStamps timestamp{};
timestamp.GlobalStartTS = 2;
event.queueTimeStamp.GPUTimeStamp = 1;
event.queueTimeStamp.CPUTimeinNS = 100;
TagNode<HwTimeStamps> timestampNode{};
timestampNode.tagForCpuAccess = &timestamp;
event.timeStampNode = &timestampNode;
uint64_t start;
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
auto &hwHelper = HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
auto c0 = event.queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(event.queueTimeStamp.GPUTimeStamp, resolution);
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.GlobalStartTS * resolution) + c0);
event.timeStampNode = nullptr;
}
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenGetEventProfilingInfoThenGpuTimestampIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
pClDevice->setOSTime(new MockOSTimeWithConstTimestamp());
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
MockEvent<Event> event(&cmdQ, CL_COMMAND_MARKER, 0, 0);
event.queueTimeStamp.GPUTimeStamp = MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP;
event.queueTimeStamp.GPUTimeStamp = MockDeviceTimeWithConstTimestamp::gpuTimestamp;
event.setCommand(std::unique_ptr<Command>(new CommandWithoutKernel(cmdQ)));
event.submitCommand(false);
@ -824,13 +849,10 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenGetEventProfilingI
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0);
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
EXPECT_EQ(submitTime, static_cast<uint64_t>(MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP * resolution));
EXPECT_EQ(submitTime, static_cast<uint64_t>(MockDeviceTimeWithConstTimestamp::gpuTimestamp * resolution));
}
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenCalculateStartTimestampThenCorrectTimeIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
@ -852,9 +874,6 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenCalculateStartTime
}
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWhenCalculateStartTimestampThenCorrectTimeIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableDeviceBasedTimestamps.set(true);
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);

View File

@ -451,7 +451,7 @@ DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableTileAttach, true, "Experimentally
/*DRIVER TOGGLES*/
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU timestamps instead of calculated ones.")
DECLARE_DEBUG_VARIABLE(bool, EnableDeviceBasedTimestamps, false, "Driver returns timestamps in nanoseconds based on device timer.")
DECLARE_DEBUG_VARIABLE(bool, EnableDeviceBasedTimestamps, true, "Driver returns timestamps in nanoseconds based on device timer.")
DECLARE_DEBUG_VARIABLE(bool, UseCommandBufferHeaderSizeForWddmQueueSubmission, true, "0: Page size (4096), 1: sizeof(COMMAND_BUFFER_HEADER)")
DECLARE_DEBUG_VARIABLE(bool, DisableDeepBind, false, "Disable passing RTLD_DEEPBIND flag to all dlopen calls.")
DECLARE_DEBUG_VARIABLE(bool, UseUmKmDataTranslator, false, "Use helper library for UMD<->KMD (WDDM) struct layout compatibility")

View File

@ -10,10 +10,11 @@
namespace NEO {
static int PerfTicks = 0;
constexpr uint64_t convertToNs = 100;
class MockDeviceTime : public DeviceTime {
bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
pGpuCpuTime->GPUTimeStamp = ++PerfTicks;
pGpuCpuTime->CPUTimeinNS = PerfTicks;
pGpuCpuTime->CPUTimeinNS = PerfTicks * convertToNs;
return true;
}
@ -33,7 +34,7 @@ class MockOSTime : public OSTime {
}
bool getCpuTime(uint64_t *timeStamp) override {
*timeStamp = ++PerfTicks;
*timeStamp = ++PerfTicks * convertToNs;
return true;
};
double getHostTimerResolution() const override {
@ -50,12 +51,12 @@ class MockOSTime : public OSTime {
class MockDeviceTimeWithConstTimestamp : public DeviceTime {
public:
static constexpr uint64_t CPU_TIME_IN_NS = 1u; // NOLINT(readability-identifier-naming)
static constexpr uint64_t GPU_TIMESTAMP = 2u; // NOLINT(readability-identifier-naming)
static constexpr uint64_t cpuTimeInNs = 1u;
static constexpr uint64_t gpuTimestamp = 2u;
bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
pGpuCpuTime->GPUTimeStamp = GPU_TIMESTAMP;
pGpuCpuTime->CPUTimeinNS = CPU_TIME_IN_NS;
pGpuCpuTime->GPUTimeStamp = gpuTimestamp;
pGpuCpuTime->CPUTimeinNS = cpuTimeInNs;
return true;
}
@ -75,7 +76,7 @@ class MockOSTimeWithConstTimestamp : public OSTime {
}
bool getCpuTime(uint64_t *timeStamp) override {
*timeStamp = MockDeviceTimeWithConstTimestamp::CPU_TIME_IN_NS;
*timeStamp = MockDeviceTimeWithConstTimestamp::cpuTimeInNs;
return true;
}

View File

@ -195,7 +195,7 @@ EnableTimestampPacket = -1
AllocateSharedAllocationsWithCpuAndGpuStorage = -1
UseMaxSimdSizeToDeduceMaxWorkgroupSize = 0
ReturnRawGpuTimestamps = 0
EnableDeviceBasedTimestamps = 0
EnableDeviceBasedTimestamps = 1
MaxHwThreadsPercent = 0
MinHwThreadsUnoccupied = 0
LimitBlitterMaxWidth = -1