mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 10:26:29 +08:00
Tuning start and end timestamp
Change-Id: I1504c596cbb42de266b62aeb1886bf6fb6501ad9 Signed-off-by: Koska Andrzej<andrzej.koska@intel.com> Related-To: NEO-4615
This commit is contained in:
@@ -310,6 +310,13 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
|||||||
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
|
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
|
||||||
|
|
||||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
|
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
|
||||||
|
|
||||||
|
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||||
|
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
|
||||||
|
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||||
|
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||||
|
}
|
||||||
|
|
||||||
/* calculation based on equation
|
/* calculation based on equation
|
||||||
CpuTime = GpuTime * scalar + const( == c0)
|
CpuTime = GpuTime * scalar + const( == c0)
|
||||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||||
@@ -328,7 +335,6 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
|||||||
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
||||||
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
||||||
|
|
||||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
|
||||||
endTimeStamp = startTimeStamp + cpuDuration;
|
endTimeStamp = startTimeStamp + cpuDuration;
|
||||||
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
||||||
|
|
||||||
|
|||||||
@@ -697,6 +697,11 @@ TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenP
|
|||||||
EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo()));
|
EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenCallGetGlobalTimeStampBitsReturnsCorrectValue) {
|
||||||
|
auto &helper = HwHelper::get(renderCoreFamily);
|
||||||
|
EXPECT_EQ(helper.getGlobalTimeStampBits(), 36U);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) {
|
TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) {
|
||||||
DebugManagerStateRestore dbgRestore;
|
DebugManagerStateRestore dbgRestore;
|
||||||
VariableBackup<bool> orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory);
|
VariableBackup<bool> orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory);
|
||||||
|
|||||||
@@ -522,6 +522,48 @@ TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawData
|
|||||||
event.timeStampNode = nullptr;
|
event.timeStampNode = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
|
||||||
|
DebugManagerStateRestore stateRestore;
|
||||||
|
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
||||||
|
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||||
|
MyOSTime::instanceNum = 0;
|
||||||
|
device->setOSTime(new MyOSTime());
|
||||||
|
EXPECT_EQ(1, MyOSTime::instanceNum);
|
||||||
|
MockContext context(device.get());
|
||||||
|
MockCommandQueue cmdQ(&context, device.get(), nullptr);
|
||||||
|
cmdQ.setProfilingEnabled();
|
||||||
|
cmdQ.device = device.get();
|
||||||
|
|
||||||
|
HwTimeStamps timestamp;
|
||||||
|
timestamp.GlobalStartTS = 0;
|
||||||
|
timestamp.ContextStartTS = 20;
|
||||||
|
timestamp.GlobalEndTS = 80;
|
||||||
|
timestamp.ContextEndTS = 56;
|
||||||
|
timestamp.GlobalCompleteTS = 0;
|
||||||
|
timestamp.ContextCompleteTS = 70;
|
||||||
|
|
||||||
|
MockTagNode<HwTimeStamps> timestampNode;
|
||||||
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
|
||||||
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
|
cl_event clEvent = &event;
|
||||||
|
|
||||||
|
event.queueTimeStamp.CPUTimeinNS = 83;
|
||||||
|
event.queueTimeStamp.GPUTimeStamp = 1;
|
||||||
|
|
||||||
|
event.setCPUProfilingPath(false);
|
||||||
|
event.timeStampNode = ×tampNode;
|
||||||
|
event.calcProfilingData();
|
||||||
|
|
||||||
|
cl_ulong queued, start;
|
||||||
|
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||||
|
|
||||||
|
EXPECT_LT(queued, start);
|
||||||
|
event.timeStampNode = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test {
|
struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test {
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
PerformanceCountersFixture::SetUp();
|
PerformanceCountersFixture::SetUp();
|
||||||
|
|||||||
@@ -92,6 +92,7 @@ class HwHelper {
|
|||||||
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
|
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
|
||||||
|
|
||||||
virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
|
virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
|
||||||
|
virtual uint32_t getGlobalTimeStampBits() const = 0;
|
||||||
|
|
||||||
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
||||||
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
||||||
@@ -244,6 +245,8 @@ class HwHelperHw : public HwHelper {
|
|||||||
|
|
||||||
bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override;
|
bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override;
|
||||||
|
|
||||||
|
uint32_t getGlobalTimeStampBits() const override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static const AuxTranslationMode defaultAuxTranslationMode;
|
static const AuxTranslationMode defaultAuxTranslationMode;
|
||||||
HwHelperHw() = default;
|
HwHelperHw() = default;
|
||||||
|
|||||||
@@ -20,6 +20,11 @@ uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo
|
|||||||
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
|
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
inline uint32_t HwHelperHw<GfxFamily>::getGlobalTimeStampBits() const {
|
||||||
|
return 36;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
|
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
|
||||||
coherencyFlag = true;
|
coherencyFlag = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user