mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
fix: setup both cpu & gpu timestamp when setting timestamp in event
Related-To: NEO-8394 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5c8cbc80eb
commit
c68f7c8fe2
@@ -227,13 +227,11 @@ void CommandQueueHw<Family>::setupEvent(EventBuilder &eventBuilder, cl_event *ou
|
||||
|
||||
if (eventObj->isProfilingEnabled()) {
|
||||
TimeStampData queueTimeStamp;
|
||||
|
||||
getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
|
||||
eventObj->setQueueTimeStamp(&queueTimeStamp);
|
||||
eventObj->setQueueTimeStamp(queueTimeStamp);
|
||||
|
||||
if (isCommandWithoutKernel(cmdType) && cmdType != CL_COMMAND_MARKER) {
|
||||
eventObj->setCPUProfilingPath(true);
|
||||
eventObj->setQueueTimeStamp();
|
||||
}
|
||||
}
|
||||
DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", cmdType, "output Event", eventObj);
|
||||
|
||||
@@ -60,7 +60,9 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||
if (eventsRequest.outEvent && !isMarkerRequiredForEventSignal) {
|
||||
eventBuilder.create<Event>(this, transferProperties.cmdType, CompletionStamp::notReady, CompletionStamp::notReady);
|
||||
outEventObj = eventBuilder.getEvent();
|
||||
outEventObj->setQueueTimeStamp();
|
||||
TimeStampData queueTimeStamp;
|
||||
getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp);
|
||||
outEventObj->setQueueTimeStamp(queueTimeStamp);
|
||||
outEventObj->setCPUProfilingPath(true);
|
||||
*eventsRequest.outEvent = outEventObj;
|
||||
}
|
||||
@@ -96,7 +98,9 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
||||
bool modifySimulationFlags = false;
|
||||
|
||||
if (outEventObj) {
|
||||
outEventObj->setSubmitTimeStamp();
|
||||
TimeStampData submitTimeStamp;
|
||||
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
outEventObj->setSubmitTimeStamp(submitTimeStamp);
|
||||
}
|
||||
// wait for the completness of previous commands
|
||||
if (transferProperties.finishRequired) {
|
||||
|
||||
@@ -418,7 +418,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
completionStamp.taskLevel = taskLevel;
|
||||
|
||||
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
TimeStampData submitTimeStamp;
|
||||
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
|
||||
@@ -846,7 +848,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
}
|
||||
|
||||
if (isProfilingEnabled() && eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
TimeStampData submitTimeStamp;
|
||||
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
|
||||
|
||||
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();
|
||||
if (hwTimestampNode) {
|
||||
@@ -1111,7 +1115,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
TimeStampData submitTimeStamp;
|
||||
getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
|
||||
|
||||
@@ -612,11 +612,10 @@ void Event::submitCommand(bool abortTasks) {
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation());
|
||||
cmdToProcess->timestamp = timeStampNode;
|
||||
}
|
||||
this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
if (profilingCpuPath) {
|
||||
setSubmitTimeStamp();
|
||||
setStartTimeStamp();
|
||||
} else {
|
||||
this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
|
||||
}
|
||||
if (perfCountersEnabled && perfCounterNode) {
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
|
||||
@@ -889,18 +888,6 @@ bool Event::tryFlushEvent() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Event::setQueueTimeStamp() {
|
||||
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
|
||||
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&queueTimeStamp.cpuTimeinNS);
|
||||
}
|
||||
}
|
||||
|
||||
void Event::setSubmitTimeStamp() {
|
||||
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
|
||||
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.cpuTimeinNS);
|
||||
}
|
||||
}
|
||||
|
||||
void Event::setStartTimeStamp() {
|
||||
if (this->profilingEnabled && (this->cmdQueue != nullptr)) {
|
||||
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&startTimeStamp);
|
||||
|
||||
@@ -287,12 +287,13 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
return this->taskCount;
|
||||
}
|
||||
|
||||
void setQueueTimeStamp(TimeStampData *queueTimeStamp) {
|
||||
this->queueTimeStamp = *queueTimeStamp;
|
||||
void setQueueTimeStamp(const TimeStampData &queueTimeStamp) {
|
||||
this->queueTimeStamp = queueTimeStamp;
|
||||
};
|
||||
|
||||
void setQueueTimeStamp();
|
||||
void setSubmitTimeStamp();
|
||||
void setSubmitTimeStamp(const TimeStampData &submitTimeStamp) {
|
||||
this->submitTimeStamp = submitTimeStamp;
|
||||
};
|
||||
|
||||
void setStartTimeStamp();
|
||||
void setEndTimeStamp();
|
||||
|
||||
@@ -105,7 +105,7 @@ HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEna
|
||||
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
|
||||
|
||||
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
|
||||
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
|
||||
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
|
||||
|
||||
ev->queueTimeStamp.gpuTimeStamp = 1000;
|
||||
ev->calculateSubmitTimestampData();
|
||||
@@ -143,7 +143,7 @@ HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacke
|
||||
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
|
||||
|
||||
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
|
||||
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
|
||||
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
|
||||
|
||||
ev->queueTimeStamp.gpuTimeStamp = 1000;
|
||||
ev->calculateSubmitTimestampData();
|
||||
@@ -540,4 +540,4 @@ HWTEST_F(EnqueueHandlerMultiRootSync, givenOutEventInMultiRootContextWhenEnqueue
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1308,16 +1308,6 @@ TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) {
|
||||
MyEvent ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0);
|
||||
|
||||
ev.setProfilingEnabled(true);
|
||||
ev.setQueueTimeStamp();
|
||||
TimeStampData outtimeStamp = {0, 0};
|
||||
outtimeStamp = ev.getQueueTimeStamp();
|
||||
EXPECT_NE(0ULL, outtimeStamp.cpuTimeinNS);
|
||||
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
|
||||
|
||||
ev.setSubmitTimeStamp();
|
||||
outtimeStamp = ev.getSubmitTimeStamp();
|
||||
EXPECT_NE(0ULL, outtimeStamp.cpuTimeinNS);
|
||||
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
|
||||
|
||||
ev.setStartTimeStamp();
|
||||
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
|
||||
@@ -1331,20 +1321,29 @@ TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) {
|
||||
EXPECT_NE(0ULL, outCPUtimeStamp);
|
||||
}
|
||||
|
||||
TEST_F(EventTest, whenSettingQueueTimestampThenCorrectTimestampIsSet) {
|
||||
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
|
||||
|
||||
TimeStampData queueTimeStamp = {1234, 5678};
|
||||
event.setQueueTimeStamp(queueTimeStamp);
|
||||
auto timeStamp = event.getQueueTimeStamp();
|
||||
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
|
||||
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
|
||||
}
|
||||
|
||||
TEST_F(EventTest, whenSettingSubmitTimestampThenCorrectTimestampIsSet) {
|
||||
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
|
||||
|
||||
TimeStampData submitTimeStamp = {1234, 5678};
|
||||
event.setSubmitTimeStamp(submitTimeStamp);
|
||||
auto timeStamp = event.getSubmitTimeStamp();
|
||||
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
|
||||
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
|
||||
}
|
||||
|
||||
TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) {
|
||||
MyEvent ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
|
||||
|
||||
ev.setQueueTimeStamp();
|
||||
TimeStampData outtimeStamp = {0, 0};
|
||||
outtimeStamp = ev.getQueueTimeStamp();
|
||||
EXPECT_EQ(0ULL, outtimeStamp.cpuTimeinNS);
|
||||
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
|
||||
|
||||
ev.setSubmitTimeStamp();
|
||||
outtimeStamp = ev.getSubmitTimeStamp();
|
||||
EXPECT_EQ(0ULL, outtimeStamp.cpuTimeinNS);
|
||||
EXPECT_EQ(0ULL, outtimeStamp.gpuTimeStamp);
|
||||
|
||||
ev.setStartTimeStamp();
|
||||
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
|
||||
EXPECT_EQ(0ULL, outCPUtimeStamp);
|
||||
@@ -1937,4 +1936,4 @@ TEST(MultiRootEvent, givenEventWithTagWhenEventGetsNewTagThenNewTagContainerIsNo
|
||||
EXPECT_NE(containerPtr, nullptr);
|
||||
event.getMultiRootTimestampSyncNode();
|
||||
EXPECT_EQ(containerPtr, event.getMultiRootDeviceTimestampPacketNodes());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampHasGPUTime) {
|
||||
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
@@ -205,7 +205,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNo
|
||||
EXPECT_NE(0u, mockEvent->queueTimeStamp.gpuTimeStamp);
|
||||
EXPECT_NE(0u, mockEvent->queueTimeStamp.cpuTimeinNS);
|
||||
EXPECT_LT(mockEvent->queueTimeStamp.cpuTimeinNS, mockEvent->submitTimeStamp.cpuTimeinNS);
|
||||
EXPECT_EQ(0u, mockEvent->submitTimeStamp.gpuTimeStamp);
|
||||
EXPECT_NE(0u, mockEvent->submitTimeStamp.gpuTimeStamp);
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user