feature: Submission for GlobalTimestamp as default

Related-To: NEO-11908

Signed-off-by: Chandio, Bibrak Qamar <bibrak.qamar.chandio@intel.com>
This commit is contained in:
Chandio, Bibrak Qamar
2025-01-16 22:08:53 +00:00
committed by Compute-Runtime-Automation
parent 215ac6374b
commit 3d3c5ddf9f
12 changed files with 61 additions and 34 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -108,7 +108,7 @@ ze_result_t zeDeviceGetGlobalTimestamps(
ze_device_handle_t hDevice,
uint64_t *hostTimestamp,
uint64_t *deviceTimestamp) {
return L0::Device::fromHandle(hDevice)->getGlobalTimestamps(hostTimestamp, deviceTimestamp);
return L0::Device::fromHandle(hDevice)->getGlobalTimestamps(hostTimestamp, deviceTimestamp, true);
}
ze_result_t zeDeviceReserveCacheExt(

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -87,7 +87,7 @@ struct Device : _ze_device_handle_t {
virtual ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) = 0;
virtual ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) = 0;
virtual ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) = 0;
virtual ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) = 0;
virtual ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp, const bool useSubmissionMethod) = 0;
virtual ze_result_t getCommandQueueGroupProperties(uint32_t *pCount,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) = 0;

View File

@@ -1092,14 +1092,18 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
return ZE_RESULT_SUCCESS;
}
ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp, const bool useSubmissionMethod) {
bool method = 0;
if (NEO::debugManager.flags.EnableGlobalTimestampViaSubmission.get() != -1) {
method = NEO::debugManager.flags.EnableGlobalTimestampViaSubmission.get();
bool getGlobalTimestampUsingSubmissionMethod = true;
if (!useSubmissionMethod) {
// It needs to be set as false because if `useSubmissionMethod` is false then it takes precedence to the debug variable `EnableGlobalTimestampViaSubmission`.
getGlobalTimestampUsingSubmissionMethod = false;
} else if (NEO::debugManager.flags.EnableGlobalTimestampViaSubmission.get() != -1) {
getGlobalTimestampUsingSubmissionMethod = !!NEO::debugManager.flags.EnableGlobalTimestampViaSubmission.get();
}
if (method == 0) {
if (!getGlobalTimestampUsingSubmissionMethod) {
auto ret = getGlobalTimestampsUsingOsInterface(hostTimestamp, deviceTimestamp);
if (ret != ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) {
return ret;

View File

@@ -71,7 +71,7 @@ struct DeviceImp : public Device, NEO::NonCopyableOrMovableClass {
ze_result_t getCommandQueueGroupProperties(uint32_t *pCount,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) override;
ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) override;
ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) override;
ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp, const bool useSubmissionMethod) override;
ze_result_t getDebugProperties(zet_device_debug_properties_t *pDebugProperties) override;
ze_result_t systemBarrier() override;

View File

@@ -569,7 +569,7 @@ NEO::GraphicsAllocation *Event::getAllocation(Device *device) const {
void Event::setGpuStartTimestamp() {
if (isEventTimestampFlagSet()) {
this->device->getGlobalTimestamps(&cpuStartTimestamp, &gpuStartTimestamp);
this->device->getGlobalTimestamps(&cpuStartTimestamp, &gpuStartTimestamp, false);
cpuStartTimestamp = cpuStartTimestamp / this->device->getNEODevice()->getDeviceInfo().outProfilingTimerResolution;
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -48,7 +48,7 @@ struct MockDevice : public Device {
ADDMETHOD_NOBASE(getCommandQueueGroupProperties, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties));
ADDMETHOD_NOBASE(getDeviceImageProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_image_properties_t * pDeviceImageProperties));
ADDMETHOD_NOBASE(getExternalMemoryProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_external_memory_properties_t * pExternalMemoryProperties));
ADDMETHOD_NOBASE(getGlobalTimestamps, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t * hostTimestamp, uint64_t *deviceTimestamp));
ADDMETHOD_NOBASE(getGlobalTimestamps, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t * hostTimestamp, uint64_t *deviceTimestamp, const bool useSubmissionMethod));
ADDMETHOD_NOBASE(systemBarrier, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(getRootDevice, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t * phRootDevice));
// Runtime internal methods

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1244,6 +1244,10 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWitho
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagSetWhenCpuMemcpyThenSetCorrectGpuTimestamps, MatchAny) {
// This test is design for testing timestamp when using the Os Interface
debugManager.flags.EnableGlobalTimestampViaSubmission.set(0);
ze_command_queue_desc_t queueDesc = {};
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;

View File

@@ -5037,6 +5037,7 @@ HWTEST2_F(InOrderCmdListTests, givenAubModeWhenSyncCalledAlwaysPollForCompletion
}
HWTEST2_F(InOrderCmdListTests, givenProfilingEventWhenDoingCpuCopyThenSetProfilingData, IsAtLeastXeHpCore) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->copyThroughLockedPtrEnabled = true;

View File

@@ -1692,20 +1692,20 @@ TEST_F(DeviceTest, givenInvalidPciBusInfoWhenPciPropertiesIsCalledThenUninitiali
}
}
TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithOsInterfaceThenSuccessIsReturnedAndValuesSetCorrectly) {
TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithOsInterfaceViaDebugVariableThenSuccessIsReturnedAndValuesSetCorrectly) {
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
debugManager.flags.EnableGlobalTimestampViaSubmission.set(0);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
EXPECT_NE(0u, deviceTs);
}
TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithSubmissionThenSuccessIsReturned) {
TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithSubmissionViaDebugVariableThenSuccessIsReturned) {
debugManager.flags.EnableGlobalTimestampViaSubmission.set(1);
@@ -1713,12 +1713,28 @@ TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithSubmissionThenSuccessIsRetu
uint64_t deviceTs = 0u;
// First time to hit the if case of initialization of internal structures.
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
// Second time to hit the false case for initialization of internal structures as they are already initialized.
result = device->getGlobalTimestamps(&hostTs, &deviceTs);
result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
}
TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledWithUseSubmissionAsTrueThenSuccessIsReturned) {
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
// First time to hit the if case of initialization of internal structures.
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
// Second time to hit the false case for initialization of internal structures as they are already initialized.
result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
}
@@ -1736,7 +1752,7 @@ TEST_F(DeviceTest, givenAppendWriteGlobalTimestampFailsWhenGetGlobalTimestampsUs
uint64_t deviceTs = 0u;
// First time to hit the if case of initialization of internal structures.
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, hostTs);
@@ -1750,7 +1766,7 @@ TEST_F(DeviceTest, givenAppendWriteGlobalTimestampFailsWhenGetGlobalTimestampsUs
L0::CommandList::fromHandle(deviceImp->globalTimestampCommandList)->appendWriteGlobalTimestamp(nullptr, nullptr, 0, nullptr);
result = device->getGlobalTimestamps(&hostTs, &deviceTs);
result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
// Swap back the command list.
@@ -1789,7 +1805,7 @@ TEST_F(DeviceTest, givenCreateHostUnifiedMemoryAllocationFailsWhenGetGlobalTimes
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
mockDriverHandleImp->setSVMAllocsManager(nullptr);
@@ -1820,6 +1836,7 @@ struct GlobalTimestampTest : public ::testing::Test {
};
TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetGpuCpuTimeIsDeviceLostReturnError) {
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
@@ -1830,7 +1847,7 @@ TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetGpuCpuTimeIsDevice
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
}
@@ -1846,7 +1863,7 @@ TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetGpuCpuTimeIsUnsupp
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -1972,11 +1989,12 @@ TEST_F(GlobalTimestampTest, whenGetGlobalTimestampsUsingSubmissionAndGetCpuTimeH
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, true);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
}
TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuTimeIsFalseReturnArbitraryValues) {
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
@@ -1987,14 +2005,14 @@ TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuTimeIsFalseRetu
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, hostTs);
EXPECT_NE(0u, deviceTs);
}
TEST_F(DeviceTest, givenPrintGlobalTimestampIsSetWhenGetGlobalTimestampIsCalledThenOutputStringIsAsExpected) {
DebugManagerStateRestore restorer;
debugManager.flags.PrintGlobalTimestampInNs.set(true);
uint64_t hostTs = 0u;
uint64_t deviceTs = 0u;
@@ -2011,7 +2029,7 @@ TEST_F(DeviceTest, givenPrintGlobalTimestampIsSetWhenGetGlobalTimestampIsCalledT
capabilityTable.kernelTimestampValidBits = 32;
testing::internal::CaptureStdout();
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::string output = testing::internal::GetCapturedStdout();
// Considering kernelTimestampValidBits(32)
@@ -2042,7 +2060,7 @@ TEST_F(DeviceTest, givenPrintGlobalTimestampIsSetAnd64bitTimestampWhenGetGlobalT
uint64_t deviceTs = 0u;
testing::internal::CaptureStdout();
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs);
ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
std::string output = testing::internal::GetCapturedStdout();
const std::string expectedString("Host timestamp in ns : 0 | Device timestamp in ns : " +

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -348,7 +348,7 @@ ze_result_t getDeviceTimestamps(DeviceImp *deviceImp, const ze_bool_t synchroniz
uint64_t hostTimestamp;
uint64_t deviceTimestamp;
result = deviceImp->getGlobalTimestamps(&hostTimestamp, &deviceTimestamp);
result = deviceImp->getGlobalTimestamps(&hostTimestamp, &deviceTimestamp, false);
if (result != ZE_RESULT_SUCCESS) {
*globalTimestamp = 0;
*metricTimestamp = 0;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1001,7 +1001,7 @@ ze_result_t OaMetricGroupImp::getMetricTimestampsExp(const ze_bool_t synchronize
uint64_t hostTimestamp;
uint64_t deviceTimestamp;
result = deviceImp->getGlobalTimestamps(&hostTimestamp, &deviceTimestamp);
result = deviceImp->getGlobalTimestamps(&hostTimestamp, &deviceTimestamp, false);
if (result != ZE_RESULT_SUCCESS) {
*globalTimestamp = 0;
*metricTimestamp = 0;

View File

@@ -672,7 +672,7 @@ DECLARE_DEBUG_VARIABLE(int64_t, ReadOnlyAllocationsTypeMask, 0, "0: default, >0
DECLARE_DEBUG_VARIABLE(bool, IgnoreZebinUnknownAttributes, false, "enable to treat unknown zebin attributes as warning instead of error");
DECLARE_DEBUG_VARIABLE(int64_t, FinalizerInputType, 0, "0: default (N/A), input type for finalizer")
DECLARE_DEBUG_VARIABLE(std::string, FinalizerLibraryName, std::string("unk"), "Library name for finalizer")
DECLARE_DEBUG_VARIABLE(int32_t, EnableGlobalTimestampViaSubmission, -1, "-1: OS Interface, 0: OS Interface, 1: Submission. This flag sets the type of method to get timestamp for getGlobalTimestamps");
DECLARE_DEBUG_VARIABLE(int32_t, EnableGlobalTimestampViaSubmission, -1, "-1: default (submission method), 0: os interface method, 1: submission method. This flag sets the type of method to get timestamp for getGlobalTimestamps");
/* Binary Cache */
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")