mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Stop querying gpu frequency during each profiling data calculation
change type of profiling timer resolution in device info to double Change-Id: I41a67ecf61cd3bdc5a997b1f083b9998063f4f7f
This commit is contained in:

committed by
sys_ocldev

parent
a3b782ffc7
commit
881895bd2a
@ -418,7 +418,7 @@ CIF::RAII::UPtr_t<IGC::IgcOclTranslationCtxTagOCL> CompilerInterface::createIgcT
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().profilingTimerResolution));
|
||||
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().outProfilingTimerResolution));
|
||||
auto igcPlatform = newDeviceCtx->GetPlatformHandle();
|
||||
auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle();
|
||||
auto igcFeWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle();
|
||||
|
@ -208,7 +208,8 @@ void Device::initializeCaps() {
|
||||
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
|
||||
|
||||
deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE;
|
||||
deviceInfo.profilingTimerResolution = static_cast<size_t>(getProfilingTimerResolution());
|
||||
deviceInfo.profilingTimerResolution = getProfilingTimerResolution();
|
||||
deviceInfo.outProfilingTimerResolution = static_cast<size_t>(deviceInfo.profilingTimerResolution);
|
||||
deviceInfo.memBaseAddressAlign = 1024;
|
||||
deviceInfo.minDataTypeAlignSize = 128;
|
||||
|
||||
|
@ -92,7 +92,8 @@ struct DeviceInfo {
|
||||
cl_device_local_mem_type localMemType;
|
||||
cl_ulong localMemSize;
|
||||
cl_bool errorCorrectionSupport;
|
||||
size_t profilingTimerResolution;
|
||||
double profilingTimerResolution;
|
||||
size_t outProfilingTimerResolution;
|
||||
cl_bool endianLittle;
|
||||
cl_bool deviceAvailable;
|
||||
cl_bool compilerAvailable;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -133,7 +133,7 @@ template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG > : pu
|
||||
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, uint32_t, &DeviceInfo::preferredVectorWidthShort> {};
|
||||
template<> struct Map<CL_DEVICE_PRINTF_BUFFER_SIZE > : public MapBase<CL_DEVICE_PRINTF_BUFFER_SIZE, size_t, &DeviceInfo::printfBufferSize> {};
|
||||
template<> struct Map<CL_DEVICE_PROFILE > : public MapBase<CL_DEVICE_PROFILE, const char *, &DeviceInfo::profile> {};
|
||||
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::profilingTimerResolution> {};
|
||||
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::outProfilingTimerResolution> {};
|
||||
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, uint32_t, &DeviceInfo::queueOnDeviceMaxSize> {};
|
||||
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, uint32_t, &DeviceInfo::queueOnDevicePreferredSize> {};
|
||||
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, uint64_t, &DeviceInfo::queueOnDeviceProperties> {};
|
||||
|
@ -259,7 +259,7 @@ bool Event::calcProfilingData() {
|
||||
|
||||
int64_t c0 = 0;
|
||||
if (!dataCalculated && timeStampNode && !profilingCpuPath) {
|
||||
double frequency = cmdQueue->getDevice().getProfilingTimerResolution();
|
||||
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
/* calculation based on equation
|
||||
CpuTime = GpuTime * scalar + const( == c0)
|
||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||
|
@ -77,7 +77,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) {
|
||||
|
||||
BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(83u, caps.profilingTimerResolution);
|
||||
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
typedef Test<DeviceFixture> BdwUsDeviceIdTest;
|
||||
|
@ -35,7 +35,7 @@ BXTTEST_F(BxtDeviceCaps, reportsOcl12) {
|
||||
|
||||
BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(52u, caps.profilingTimerResolution);
|
||||
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) {
|
||||
|
@ -29,7 +29,7 @@ typedef Test<DeviceFixture> Gen9DeviceCaps;
|
||||
|
||||
GLKTEST_F(Gen9DeviceCaps, GlkProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(52u, caps.profilingTimerResolution);
|
||||
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) {
|
||||
|
@ -35,7 +35,7 @@ SKLTEST_F(SklDeviceCaps, reportsOcl21) {
|
||||
|
||||
SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(83u, caps.profilingTimerResolution);
|
||||
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) {
|
||||
|
@ -402,13 +402,44 @@ struct MockTagNode : public TagNode<TagType> {
|
||||
}
|
||||
};
|
||||
|
||||
TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhenCompleteIsZero) {
|
||||
MockDevice *device = DeviceHelper<>::create();
|
||||
class MyOSTime : public OSTime {
|
||||
public:
|
||||
static int instanceNum;
|
||||
MyOSTime() {
|
||||
instanceNum++;
|
||||
}
|
||||
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||
EXPECT_FALSE(true);
|
||||
return 1.0;
|
||||
}
|
||||
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
|
||||
EXPECT_FALSE(true);
|
||||
return false;
|
||||
}
|
||||
bool getCpuTime(uint64_t *timeStamp) override {
|
||||
EXPECT_FALSE(true);
|
||||
return false;
|
||||
};
|
||||
double getHostTimerResolution() const override {
|
||||
EXPECT_FALSE(true);
|
||||
return 0;
|
||||
}
|
||||
uint64_t getCpuRawTimestamp() override {
|
||||
EXPECT_FALSE(true);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
int MyOSTime::instanceNum = 0;
|
||||
TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
||||
std::unique_ptr<MockDevice> device(DeviceHelper<>::create());
|
||||
MyOSTime::instanceNum = 0;
|
||||
device->setOSTime(new MyOSTime());
|
||||
EXPECT_EQ(1, MyOSTime::instanceNum);
|
||||
MockContext context;
|
||||
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
|
||||
MockCommandQueue cmdQ(&context, device, props);
|
||||
MockCommandQueue cmdQ(&context, device.get(), props);
|
||||
cmdQ.setProfilingEnabled();
|
||||
cmdQ.device = device;
|
||||
cmdQ.device = device.get();
|
||||
|
||||
HwTimeStamps timestamp;
|
||||
timestamp.GlobalStartTS = 10;
|
||||
@ -429,7 +460,6 @@ TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhe
|
||||
|
||||
EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS);
|
||||
cmdQ.device = nullptr;
|
||||
delete device;
|
||||
}
|
||||
|
||||
struct ProfilingWithPerfCountersTests : public ProfilingTests,
|
||||
@ -774,5 +804,4 @@ HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCount
|
||||
|
||||
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
||||
}
|
||||
|
||||
} // namespace OCLRT
|
||||
|
Reference in New Issue
Block a user