Fix event profiling for marker commands

Related-To: NEO-5799

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2021-06-22 13:16:27 +00:00
committed by Compute-Runtime-Automation
parent 816e95443f
commit ad19eda689
16 changed files with 389 additions and 35 deletions

View File

@@ -160,21 +160,19 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
return CL_PROFILING_INFO_NOT_AVAILABLE;
}
uint64_t timestamp = 0u;
// if paramValue is NULL, it is ignored
switch (paramName) {
case CL_PROFILING_COMMAND_QUEUED:
src = &queueTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &queueTimeStamp.GPUTimeStamp;
}
timestamp = getTimeInNSFromTimestampData(queueTimeStamp);
src = &timestamp;
srcSize = sizeof(cl_ulong);
break;
case CL_PROFILING_COMMAND_SUBMIT:
src = &submitTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &submitTimeStamp.GPUTimeStamp;
}
timestamp = getTimeInNSFromTimestampData(submitTimeStamp);
src = &timestamp;
srcSize = sizeof(cl_ulong);
break;
@@ -249,6 +247,26 @@ cl_ulong Event::getDelta(cl_ulong startTime,
return Delta;
}
uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData &timestamp) const {
if (isCPUProfilingPath()) {
return timestamp.CPUTimeinNS;
}
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
return timestamp.GPUTimeStamp;
}
if (cmdQueue && DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
auto &device = cmdQueue->getDevice();
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
double resolution = device.getDeviceInfo().profilingTimerResolution;
return hwHelper.getGpuTimeStampInNS(timestamp.GPUTimeStamp, resolution);
}
return timestamp.CPUTimeinNS;
}
bool Event::calcProfilingData() {
if (!dataCalculated && !profilingCpuPath) {
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
@@ -294,23 +312,29 @@ bool Event::calcProfilingData() {
}
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
uint64_t gpuDuration = 0;
uint64_t cpuDuration = 0;
uint64_t gpuCompleteDuration = 0;
uint64_t cpuCompleteDuration = 0;
auto &hwHelper = HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
auto &device = this->cmdQueue->getDevice();
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
auto frequency = device.getDeviceInfo().profilingTimerResolution;
auto gpuQueueTimeStamp = hwHelper.getGpuTimeStampInNS(queueTimeStamp.GPUTimeStamp, frequency);
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency);
if (startTimeStamp < gpuQueueTimeStamp) {
startTimeStamp += static_cast<uint64_t>((1ULL << hwHelper.getGlobalTimeStampBits()) * frequency);
}
} else {
int64_t c0 = queueTimeStamp.CPUTimeinNS - gpuQueueTimeStamp;
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
}
}
/* calculation based on equation

View File

@@ -326,6 +326,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
}
}
uint64_t getTimeInNSFromTimestampData(const TimeStampData &timestamp) const;
bool calcProfilingData();
MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
MOCKABLE_VIRTUAL void synchronizeTaskCount() {