Fix event profiling for marker commands

Related-To: NEO-5799

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2021-06-22 13:16:27 +00:00
committed by Compute-Runtime-Automation
parent 6fb1be80c8
commit 1df1094465
10 changed files with 136 additions and 40 deletions

View File

@@ -160,21 +160,19 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
return CL_PROFILING_INFO_NOT_AVAILABLE;
}
uint64_t timestamp;
// if paramValue is NULL, it is ignored
switch (paramName) {
case CL_PROFILING_COMMAND_QUEUED:
src = &queueTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &queueTimeStamp.GPUTimeStamp;
}
timestamp = getTimeInNSFromTimestampData(queueTimeStamp);
src = &timestamp;
srcSize = sizeof(cl_ulong);
break;
case CL_PROFILING_COMMAND_SUBMIT:
src = &submitTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &submitTimeStamp.GPUTimeStamp;
}
timestamp = getTimeInNSFromTimestampData(submitTimeStamp);
src = &timestamp;
srcSize = sizeof(cl_ulong);
break;
@@ -249,6 +247,20 @@ cl_ulong Event::getDelta(cl_ulong startTime,
return Delta;
}
uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData &timestamp) const {
if (isCPUProfilingPath()) {
return timestamp.CPUTimeinNS;
}
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
return timestamp.GPUTimeStamp;
}
double resolution = cmdQueue ? cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution : 0.0;
return static_cast<uint64_t>(timestamp.GPUTimeStamp * resolution);
}
bool Event::calcProfilingData() {
if (!dataCalculated && !profilingCpuPath) {
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
@@ -294,32 +306,22 @@ bool Event::calcProfilingData() {
}
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
uint64_t gpuDuration = 0;
uint64_t cpuDuration = 0;
uint64_t gpuCompleteDuration = 0;
uint64_t cpuCompleteDuration = 0;
auto &hwHelper = HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
auto &device = this->cmdQueue->getDevice();
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
auto frequency = device.getDeviceInfo().profilingTimerResolution;
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
startTimeStamp = hwHelper.getGpuTimeStampInNS(globalStartTS, frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
if (startTimeStamp < queueTimeStamp.GPUTimeStamp) {
startTimeStamp += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
}
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
to determine the value of the const we can use one pair of values
const = CpuTimeQueue - GpuTimeQueue * scalar
*/
//If device enqueue has not updated complete timestamp, assign end timestamp
gpuDuration = getDelta(contextStartTS, contextEndTS);
if (*contextCompleteTS == 0) {

View File

@@ -326,6 +326,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
}
}
uint64_t getTimeInNSFromTimestampData(const TimeStampData &timestamp) const;
bool calcProfilingData();
MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
MOCKABLE_VIRTUAL void synchronizeTaskCount() {