mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 08:07:19 +08:00
Optimize profiling calls.
- do not call KMD query for submitted timestamp, we only need CPU timestamp here. Change-Id: Id60c1e367d9430d893fb3a253ffc058f45fc9609 Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
39a930e122
commit
b039f5cc76
@@ -698,9 +698,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false);
|
||||
}
|
||||
|
||||
TimeStampData submitTimeStamp;
|
||||
TimeStampData submitTimeStamp = {};
|
||||
if (isProfilingEnabled() && eventBuilder.getEvent()) {
|
||||
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
|
||||
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation());
|
||||
if (isPerfCountersEnabled()) {
|
||||
|
||||
@@ -9,16 +9,15 @@
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
|
||||
namespace NEO {
|
||||
static int PerfTicks = 0;
|
||||
class MockOSTime : public OSTime {
|
||||
public:
|
||||
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
|
||||
static int PerfTicks = 0;
|
||||
pGpuCpuTime->GPUTimeStamp = ++PerfTicks;
|
||||
pGpuCpuTime->CPUTimeinNS = PerfTicks;
|
||||
return true;
|
||||
}
|
||||
bool getCpuTime(uint64_t *timeStamp) override {
|
||||
static int PerfTicks = 0;
|
||||
*timeStamp = ++PerfTicks;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -195,6 +195,35 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
|
||||
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
uint32_t dimensions = 1;
|
||||
cl_event event;
|
||||
cl_kernel clKernel = &kernel;
|
||||
|
||||
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueKernel(
|
||||
clKernel,
|
||||
dimensions,
|
||||
globalOffsets,
|
||||
workItems,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
&event);
|
||||
|
||||
auto mockEvent = static_cast<MockEvent<Event> *>(event);
|
||||
EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp);
|
||||
EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS);
|
||||
EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS);
|
||||
EXPECT_EQ(0u, mockEvent->submitTimeStamp.GPUTimeStamp);
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
/*
|
||||
# One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user