mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Fix event profiling for marker commands
Related-To: NEO-5799 Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6fb1be80c8
commit
1df1094465
@@ -160,21 +160,19 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
||||
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
uint64_t timestamp;
|
||||
|
||||
// if paramValue is NULL, it is ignored
|
||||
switch (paramName) {
|
||||
case CL_PROFILING_COMMAND_QUEUED:
|
||||
src = &queueTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &queueTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
timestamp = getTimeInNSFromTimestampData(queueTimeStamp);
|
||||
src = ×tamp;
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
case CL_PROFILING_COMMAND_SUBMIT:
|
||||
src = &submitTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &submitTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
timestamp = getTimeInNSFromTimestampData(submitTimeStamp);
|
||||
src = ×tamp;
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
@@ -249,6 +247,20 @@ cl_ulong Event::getDelta(cl_ulong startTime,
|
||||
return Delta;
|
||||
}
|
||||
|
||||
uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData ×tamp) const {
|
||||
if (isCPUProfilingPath()) {
|
||||
return timestamp.CPUTimeinNS;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
return timestamp.GPUTimeStamp;
|
||||
}
|
||||
|
||||
double resolution = cmdQueue ? cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution : 0.0;
|
||||
|
||||
return static_cast<uint64_t>(timestamp.GPUTimeStamp * resolution);
|
||||
}
|
||||
|
||||
bool Event::calcProfilingData() {
|
||||
if (!dataCalculated && !profilingCpuPath) {
|
||||
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
|
||||
@@ -294,32 +306,22 @@ bool Event::calcProfilingData() {
|
||||
}
|
||||
|
||||
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
|
||||
|
||||
uint64_t gpuDuration = 0;
|
||||
uint64_t cpuDuration = 0;
|
||||
|
||||
uint64_t gpuCompleteDuration = 0;
|
||||
uint64_t cpuCompleteDuration = 0;
|
||||
|
||||
auto &hwHelper = HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
|
||||
auto &device = this->cmdQueue->getDevice();
|
||||
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto frequency = device.getDeviceInfo().profilingTimerResolution;
|
||||
|
||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
|
||||
startTimeStamp = hwHelper.getGpuTimeStampInNS(globalStartTS, frequency);
|
||||
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
|
||||
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.GPUTimeStamp) {
|
||||
startTimeStamp += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||
}
|
||||
|
||||
/* calculation based on equation
|
||||
CpuTime = GpuTime * scalar + const( == c0)
|
||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||
to determine the value of the const we can use one pair of values
|
||||
const = CpuTimeQueue - GpuTimeQueue * scalar
|
||||
*/
|
||||
|
||||
//If device enqueue has not updated complete timestamp, assign end timestamp
|
||||
gpuDuration = getDelta(contextStartTS, contextEndTS);
|
||||
if (*contextCompleteTS == 0) {
|
||||
|
||||
@@ -326,6 +326,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t getTimeInNSFromTimestampData(const TimeStampData ×tamp) const;
|
||||
bool calcProfilingData();
|
||||
MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
|
||||
MOCKABLE_VIRTUAL void synchronizeTaskCount() {
|
||||
|
||||
Reference in New Issue
Block a user