Stop querying gpu frequency during each profiling data calculation

change type of profiling timer resolution in device info to double

Change-Id: I41a67ecf61cd3bdc5a997b1f083b9998063f4f7f
This commit is contained in:
Mateusz Jablonski
2018-04-10 16:19:13 +02:00
committed by sys_ocldev
parent a3b782ffc7
commit 881895bd2a
10 changed files with 47 additions and 16 deletions

View File

@ -418,7 +418,7 @@ CIF::RAII::UPtr_t<IGC::IgcOclTranslationCtxTagOCL> CompilerInterface::createIgcT
return nullptr;
}
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().profilingTimerResolution));
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().outProfilingTimerResolution));
auto igcPlatform = newDeviceCtx->GetPlatformHandle();
auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle();
auto igcFeWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle();

View File

@ -208,7 +208,8 @@ void Device::initializeCaps() {
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE;
deviceInfo.profilingTimerResolution = static_cast<size_t>(getProfilingTimerResolution());
deviceInfo.profilingTimerResolution = getProfilingTimerResolution();
deviceInfo.outProfilingTimerResolution = static_cast<size_t>(deviceInfo.profilingTimerResolution);
deviceInfo.memBaseAddressAlign = 1024;
deviceInfo.minDataTypeAlignSize = 128;

View File

@ -92,7 +92,8 @@ struct DeviceInfo {
cl_device_local_mem_type localMemType;
cl_ulong localMemSize;
cl_bool errorCorrectionSupport;
size_t profilingTimerResolution;
double profilingTimerResolution;
size_t outProfilingTimerResolution;
cl_bool endianLittle;
cl_bool deviceAvailable;
cl_bool compilerAvailable;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Intel Corporation
* Copyright (c) 2017 - 2018, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -133,7 +133,7 @@ template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG > : pu
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, uint32_t, &DeviceInfo::preferredVectorWidthShort> {};
template<> struct Map<CL_DEVICE_PRINTF_BUFFER_SIZE > : public MapBase<CL_DEVICE_PRINTF_BUFFER_SIZE, size_t, &DeviceInfo::printfBufferSize> {};
template<> struct Map<CL_DEVICE_PROFILE > : public MapBase<CL_DEVICE_PROFILE, const char *, &DeviceInfo::profile> {};
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::profilingTimerResolution> {};
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::outProfilingTimerResolution> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, uint32_t, &DeviceInfo::queueOnDeviceMaxSize> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, uint32_t, &DeviceInfo::queueOnDevicePreferredSize> {};
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, uint64_t, &DeviceInfo::queueOnDeviceProperties> {};

View File

@ -259,7 +259,7 @@ bool Event::calcProfilingData() {
int64_t c0 = 0;
if (!dataCalculated && timeStampNode && !profilingCpuPath) {
double frequency = cmdQueue->getDevice().getProfilingTimerResolution();
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)

View File

@ -77,7 +77,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) {
BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(83u, caps.profilingTimerResolution);
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
}
typedef Test<DeviceFixture> BdwUsDeviceIdTest;

View File

@ -35,7 +35,7 @@ BXTTEST_F(BxtDeviceCaps, reportsOcl12) {
BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(52u, caps.profilingTimerResolution);
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
}
BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) {

View File

@ -29,7 +29,7 @@ typedef Test<DeviceFixture> Gen9DeviceCaps;
GLKTEST_F(Gen9DeviceCaps, GlkProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(52u, caps.profilingTimerResolution);
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
}
GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) {

View File

@ -35,7 +35,7 @@ SKLTEST_F(SklDeviceCaps, reportsOcl21) {
SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(83u, caps.profilingTimerResolution);
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
}
SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) {

View File

@ -402,13 +402,44 @@ struct MockTagNode : public TagNode<TagType> {
}
};
TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhenCompleteIsZero) {
MockDevice *device = DeviceHelper<>::create();
class MyOSTime : public OSTime {
public:
static int instanceNum;
MyOSTime() {
instanceNum++;
}
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
EXPECT_FALSE(true);
return 1.0;
}
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
EXPECT_FALSE(true);
return false;
}
bool getCpuTime(uint64_t *timeStamp) override {
EXPECT_FALSE(true);
return false;
};
double getHostTimerResolution() const override {
EXPECT_FALSE(true);
return 0;
}
uint64_t getCpuRawTimestamp() override {
EXPECT_FALSE(true);
return 0;
}
};
int MyOSTime::instanceNum = 0;
TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
std::unique_ptr<MockDevice> device(DeviceHelper<>::create());
MyOSTime::instanceNum = 0;
device->setOSTime(new MyOSTime());
EXPECT_EQ(1, MyOSTime::instanceNum);
MockContext context;
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
MockCommandQueue cmdQ(&context, device, props);
MockCommandQueue cmdQ(&context, device.get(), props);
cmdQ.setProfilingEnabled();
cmdQ.device = device;
cmdQ.device = device.get();
HwTimeStamps timestamp;
timestamp.GlobalStartTS = 10;
@ -429,7 +460,6 @@ TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhe
EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS);
cmdQ.device = nullptr;
delete device;
}
struct ProfilingWithPerfCountersTests : public ProfilingTests,
@ -774,5 +804,4 @@ HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCount
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
}
} // namespace OCLRT