Optimize profiling calls.

- do not call KMD query for submitted timestamp, we only need CPU
timestamp here.

Change-Id: Id60c1e367d9430d893fb3a253ffc058f45fc9609
Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2020-10-23 15:38:35 +02:00
committed by sys_ocldev
parent 39a930e122
commit b039f5cc76
3 changed files with 32 additions and 4 deletions

View File

@@ -698,9 +698,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false);
}
TimeStampData submitTimeStamp;
TimeStampData submitTimeStamp = {};
if (isProfilingEnabled() && eventBuilder.getEvent()) {
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS);
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation());
if (isPerfCountersEnabled()) {

View File

@@ -9,16 +9,15 @@
#include "shared/source/os_interface/os_time.h"
namespace NEO {
static int PerfTicks = 0;
class MockOSTime : public OSTime {
public:
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
static int PerfTicks = 0;
pGpuCpuTime->GPUTimeStamp = ++PerfTicks;
pGpuCpuTime->CPUTimeinNS = PerfTicks;
return true;
}
bool getCpuTime(uint64_t *timeStamp) override {
static int PerfTicks = 0;
*timeStamp = ++PerfTicks;
return true;
};

View File

@@ -195,6 +195,35 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
clReleaseEvent(event);
}
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
size_t globalOffsets[3] = {0, 0, 0};
size_t workItems[3] = {1, 1, 1};
uint32_t dimensions = 1;
cl_event event;
cl_kernel clKernel = &kernel;
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueKernel(
clKernel,
dimensions,
globalOffsets,
workItems,
nullptr,
0,
nullptr,
&event);
auto mockEvent = static_cast<MockEvent<Event> *>(event);
EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp);
EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS);
EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS);
EXPECT_EQ(0u, mockEvent->submitTimeStamp.GPUTimeStamp);
clReleaseEvent(event);
}
/*
# One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER.
*/