mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Fix event profiling for marker commands
Related-To: NEO-5799 Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
816e95443f
commit
ad19eda689
@@ -160,21 +160,19 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
||||
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
uint64_t timestamp = 0u;
|
||||
|
||||
// if paramValue is NULL, it is ignored
|
||||
switch (paramName) {
|
||||
case CL_PROFILING_COMMAND_QUEUED:
|
||||
src = &queueTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &queueTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
timestamp = getTimeInNSFromTimestampData(queueTimeStamp);
|
||||
src = ×tamp;
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
case CL_PROFILING_COMMAND_SUBMIT:
|
||||
src = &submitTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &submitTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
timestamp = getTimeInNSFromTimestampData(submitTimeStamp);
|
||||
src = ×tamp;
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
@@ -249,6 +247,26 @@ cl_ulong Event::getDelta(cl_ulong startTime,
|
||||
return Delta;
|
||||
}
|
||||
|
||||
uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData ×tamp) const {
|
||||
if (isCPUProfilingPath()) {
|
||||
return timestamp.CPUTimeinNS;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
return timestamp.GPUTimeStamp;
|
||||
}
|
||||
|
||||
if (cmdQueue && DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
||||
auto &device = cmdQueue->getDevice();
|
||||
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
|
||||
double resolution = device.getDeviceInfo().profilingTimerResolution;
|
||||
|
||||
return hwHelper.getGpuTimeStampInNS(timestamp.GPUTimeStamp, resolution);
|
||||
}
|
||||
|
||||
return timestamp.CPUTimeinNS;
|
||||
}
|
||||
|
||||
bool Event::calcProfilingData() {
|
||||
if (!dataCalculated && !profilingCpuPath) {
|
||||
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
|
||||
@@ -294,23 +312,29 @@ bool Event::calcProfilingData() {
|
||||
}
|
||||
|
||||
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
|
||||
|
||||
uint64_t gpuDuration = 0;
|
||||
uint64_t cpuDuration = 0;
|
||||
|
||||
uint64_t gpuCompleteDuration = 0;
|
||||
uint64_t cpuCompleteDuration = 0;
|
||||
|
||||
auto &hwHelper = HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
auto gpuTimeStamp = queueTimeStamp.GPUTimeStamp;
|
||||
auto &device = this->cmdQueue->getDevice();
|
||||
auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto frequency = device.getDeviceInfo().profilingTimerResolution;
|
||||
auto gpuQueueTimeStamp = hwHelper.getGpuTimeStampInNS(queueTimeStamp.GPUTimeStamp, frequency);
|
||||
|
||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(gpuTimeStamp, frequency);
|
||||
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
|
||||
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency);
|
||||
if (startTimeStamp < gpuQueueTimeStamp) {
|
||||
startTimeStamp += static_cast<uint64_t>((1ULL << hwHelper.getGlobalTimeStampBits()) * frequency);
|
||||
}
|
||||
} else {
|
||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - gpuQueueTimeStamp;
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.CPUTimeinNS) {
|
||||
c0 += static_cast<uint64_t>((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
}
|
||||
}
|
||||
|
||||
/* calculation based on equation
|
||||
|
||||
@@ -326,6 +326,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t getTimeInNSFromTimestampData(const TimeStampData ×tamp) const;
|
||||
bool calcProfilingData();
|
||||
MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
|
||||
MOCKABLE_VIRTUAL void synchronizeTaskCount() {
|
||||
|
||||
Reference in New Issue
Block a user