fix: setup both cpu & gpu timestamp when setting timestamp in event

Related-To: NEO-8394
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-10-16 15:04:27 +02:00
committed by Compute-Runtime-Automation
parent 5c8cbc80eb
commit c68f7c8fe2
8 changed files with 48 additions and 53 deletions

View File

@@ -227,13 +227,11 @@ void CommandQueueHw<Family>::setupEvent(EventBuilder &eventBuilder, cl_event *ou
if (eventObj->isProfilingEnabled()) {
TimeStampData queueTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
eventObj->setQueueTimeStamp(&queueTimeStamp);
eventObj->setQueueTimeStamp(queueTimeStamp);
if (isCommandWithoutKernel(cmdType) && cmdType != CL_COMMAND_MARKER) {
eventObj->setCPUProfilingPath(true);
eventObj->setQueueTimeStamp();
}
}
DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", cmdType, "output Event", eventObj);

View File

@@ -60,7 +60,9 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
if (eventsRequest.outEvent && !isMarkerRequiredForEventSignal) {
eventBuilder.create<Event>(this, transferProperties.cmdType, CompletionStamp::notReady, CompletionStamp::notReady);
outEventObj = eventBuilder.getEvent();
outEventObj->setQueueTimeStamp();
TimeStampData queueTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
outEventObj->setQueueTimeStamp(queueTimeStamp);
outEventObj->setCPUProfilingPath(true);
*eventsRequest.outEvent = outEventObj;
}
@@ -96,7 +98,9 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
bool modifySimulationFlags = false;
if (outEventObj) {
outEventObj->setSubmitTimeStamp();
TimeStampData submitTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
outEventObj->setSubmitTimeStamp(submitTimeStamp);
}
// wait for the completness of previous commands
if (transferProperties.finishRequired) {

View File

@@ -418,7 +418,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
completionStamp.taskLevel = taskLevel;
if (eventBuilder.getEvent() && isProfilingEnabled()) {
eventBuilder.getEvent()->setSubmitTimeStamp();
TimeStampData submitTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
eventBuilder.getEvent()->setStartTimeStamp();
}
@@ -846,7 +848,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
}
if (isProfilingEnabled() && eventBuilder.getEvent()) {
eventBuilder.getEvent()->setSubmitTimeStamp();
TimeStampData submitTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();
if (hwTimestampNode) {
@@ -1111,7 +1115,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
}
if (eventBuilder.getEvent() && isProfilingEnabled()) {
eventBuilder.getEvent()->setSubmitTimeStamp();
TimeStampData submitTimeStamp;
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
eventBuilder.getEvent()->setStartTimeStamp();
}

View File

@@ -612,11 +612,10 @@ void Event::submitCommand(bool abortTasks) {
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation());
cmdToProcess->timestamp = timeStampNode;
}
this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
if (profilingCpuPath) {
setSubmitTimeStamp();
setStartTimeStamp();
} else {
this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
}
if (perfCountersEnabled && perfCounterNode) {
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
@@ -889,18 +888,6 @@ bool Event::tryFlushEvent() {
return true;
}
void Event::setQueueTimeStamp() {
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&queueTimeStamp.cpuTimeinNS);
}
}
void Event::setSubmitTimeStamp() {
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.cpuTimeinNS);
}
}
void Event::setStartTimeStamp() {
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&startTimeStamp);

View File

@@ -287,12 +287,13 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
return this->taskCount;
}
void setQueueTimeStamp(TimeStampData *queueTimeStamp) {
this->queueTimeStamp = *queueTimeStamp;
void setQueueTimeStamp(const TimeStampData &queueTimeStamp) {
this->queueTimeStamp = queueTimeStamp;
};
void setQueueTimeStamp();
void setSubmitTimeStamp();
void setSubmitTimeStamp(const TimeStampData &submitTimeStamp) {
this->submitTimeStamp = submitTimeStamp;
};
void setStartTimeStamp();
void setEndTimeStamp();

View File

@@ -105,7 +105,7 @@ HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEna
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
ev->queueTimeStamp.gpuTimeStamp = 1000;
ev->calculateSubmitTimestampData();
@@ -143,7 +143,7 @@ HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacke
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
ev->queueTimeStamp.gpuTimeStamp = 1000;
ev->calculateSubmitTimestampData();
@@ -540,4 +540,4 @@ HWTEST_F(EnqueueHandlerMultiRootSync, givenOutEventInMultiRootContextWhenEnqueue
clReleaseEvent(clEvent);
}
} // namespace NEO
} // namespace NEO

View File

@@ -1308,16 +1308,6 @@ TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) {
MyEvent ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0);
ev.setProfilingEnabled(true);
ev.setQueueTimeStamp();
TimeStampData outtimeStamp = {0, 0};
outtimeStamp = ev.getQueueTimeStamp();
EXPECT_NE(0ULL, outtimeStamp.cpuTimeinNS);
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
ev.setSubmitTimeStamp();
outtimeStamp = ev.getSubmitTimeStamp();
EXPECT_NE(0ULL, outtimeStamp.cpuTimeinNS);
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
ev.setStartTimeStamp();
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
@@ -1331,20 +1321,29 @@ TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) {
EXPECT_NE(0ULL, outCPUtimeStamp);
}
TEST_F(EventTest, whenSettingQueueTimestampThenCorrectTimestampIsSet) {
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
TimeStampData queueTimeStamp = {1234, 5678};
event.setQueueTimeStamp(queueTimeStamp);
auto timeStamp = event.getQueueTimeStamp();
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
}
TEST_F(EventTest, whenSettingSubmitTimestampThenCorrectTimestampIsSet) {
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
TimeStampData submitTimeStamp = {1234, 5678};
event.setSubmitTimeStamp(submitTimeStamp);
auto timeStamp = event.getSubmitTimeStamp();
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
}
TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) {
MyEvent ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
ev.setQueueTimeStamp();
TimeStampData outtimeStamp = {0, 0};
outtimeStamp = ev.getQueueTimeStamp();
EXPECT_EQ(0ULL, outtimeStamp.cpuTimeinNS);
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
ev.setSubmitTimeStamp();
outtimeStamp = ev.getSubmitTimeStamp();
EXPECT_EQ(0ULL, outtimeStamp.cpuTimeinNS);
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
ev.setStartTimeStamp();
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
EXPECT_EQ(0ULL, outCPUtimeStamp);
@@ -1937,4 +1936,4 @@ TEST(MultiRootEvent, givenEventWithTagWhenEventGetsNewTagThenNewTagContainerIsNo
EXPECT_NE(containerPtr, nullptr);
event.getMultiRootTimestampSyncNode();
EXPECT_EQ(containerPtr, event.getMultiRootDeviceTimestampPacketNodes());
}
}

View File

@@ -182,7 +182,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
clReleaseEvent(event);
}
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampHasGPUTime) {
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
@@ -205,7 +205,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNo
EXPECT_NE(0u, mockEvent->queueTimeStamp.gpuTimeStamp);
EXPECT_NE(0u, mockEvent->queueTimeStamp.cpuTimeinNS);
EXPECT_LT(mockEvent->queueTimeStamp.cpuTimeinNS, mockEvent->submitTimeStamp.cpuTimeinNS);
EXPECT_EQ(0u, mockEvent->submitTimeStamp.gpuTimeStamp);
EXPECT_NE(0u, mockEvent->submitTimeStamp.gpuTimeStamp);
clReleaseEvent(event);
}