mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Tuning start and end timestamp
Change-Id: I1504c596cbb42de266b62aeb1886bf6fb6501ad9 Signed-off-by: Koska Andrzej<andrzej.koska@intel.com> Related-To: NEO-4615
This commit is contained in:
@ -310,6 +310,13 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
||||
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
|
||||
|
||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
|
||||
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
|
||||
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
}
|
||||
|
||||
/* calculation based on equation
|
||||
CpuTime = GpuTime * scalar + const( == c0)
|
||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||
@ -328,7 +335,6 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
||||
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
||||
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
||||
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
endTimeStamp = startTimeStamp + cpuDuration;
|
||||
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
||||
|
||||
|
@ -697,6 +697,11 @@ TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenP
|
||||
EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo()));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenCallGetGlobalTimeStampBitsReturnsCorrectValue) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_EQ(helper.getGlobalTimeStampBits(), 36U);
|
||||
}
|
||||
|
||||
TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
VariableBackup<bool> orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory);
|
||||
|
@ -522,6 +522,48 @@ TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawData
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
|
||||
DebugManagerStateRestore stateRestore;
|
||||
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MyOSTime::instanceNum = 0;
|
||||
device->setOSTime(new MyOSTime());
|
||||
EXPECT_EQ(1, MyOSTime::instanceNum);
|
||||
MockContext context(device.get());
|
||||
MockCommandQueue cmdQ(&context, device.get(), nullptr);
|
||||
cmdQ.setProfilingEnabled();
|
||||
cmdQ.device = device.get();
|
||||
|
||||
HwTimeStamps timestamp;
|
||||
timestamp.GlobalStartTS = 0;
|
||||
timestamp.ContextStartTS = 20;
|
||||
timestamp.GlobalEndTS = 80;
|
||||
timestamp.ContextEndTS = 56;
|
||||
timestamp.GlobalCompleteTS = 0;
|
||||
timestamp.ContextCompleteTS = 70;
|
||||
|
||||
MockTagNode<HwTimeStamps> timestampNode;
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
cl_event clEvent = &event;
|
||||
|
||||
event.queueTimeStamp.CPUTimeinNS = 83;
|
||||
event.queueTimeStamp.GPUTimeStamp = 1;
|
||||
|
||||
event.setCPUProfilingPath(false);
|
||||
event.timeStampNode = ×tampNode;
|
||||
event.calcProfilingData();
|
||||
|
||||
cl_ulong queued, start;
|
||||
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||
|
||||
EXPECT_LT(queued, start);
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test {
|
||||
void SetUp() override {
|
||||
PerformanceCountersFixture::SetUp();
|
||||
|
@ -92,6 +92,7 @@ class HwHelper {
|
||||
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
|
||||
|
||||
virtual bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const = 0;
|
||||
virtual uint32_t getGlobalTimeStampBits() const = 0;
|
||||
|
||||
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
||||
static uint32_t getEnginesCount(const HardwareInfo &hwInfo);
|
||||
@ -244,6 +245,8 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const override;
|
||||
|
||||
uint32_t getGlobalTimeStampBits() const override;
|
||||
|
||||
protected:
|
||||
static const AuxTranslationMode defaultAuxTranslationMode;
|
||||
HwHelperHw() = default;
|
||||
|
@ -20,6 +20,11 @@ uint32_t HwHelperHw<GfxFamily>::getComputeUnitsUsedForScratch(const HardwareInfo
|
||||
pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t HwHelperHw<GfxFamily>::getGlobalTimeStampBits() const {
|
||||
return 36;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void HwHelperHw<GfxFamily>::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) {
|
||||
coherencyFlag = true;
|
||||
|
Reference in New Issue
Block a user