Implement timestamp wait for events

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-03-04 08:39:42 +00:00
committed by Compute-Runtime-Automation
parent ce85cee7eb
commit a74ae8f6af
6 changed files with 83 additions and 5 deletions

View File

@@ -987,7 +987,7 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co
} }
} }
bool CommandQueue::isWaitForTimestampsEnabled() { bool CommandQueue::isWaitForTimestampsEnabled() const {
auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily); auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto enabled = CommandQueue::isTimestampWaitEnabled(); auto enabled = CommandQueue::isTimestampWaitEnabled();
enabled &= hwHelper.isTimestampWaitSupported(); enabled &= hwHelper.isTimestampWaitSupported();

View File

@@ -204,7 +204,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const; bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const;
bool isWaitForTimestampsEnabled(); bool isWaitForTimestampsEnabled() const;
virtual bool waitForTimestamps(uint32_t taskCount) = 0; virtual bool waitForTimestamps(uint32_t taskCount) = 0;
MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL bool isQueueBlocked();

View File

@@ -161,9 +161,8 @@ bool CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
if (isWaitForTimestampsEnabled()) { if (isWaitForTimestampsEnabled()) {
waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get(), getGpgpuCommandStreamReceiver()); waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get(), getGpgpuCommandStreamReceiver());
if (isOOQEnabled()) { if (isOOQEnabled()) {
waited |= waitForTimestampsWithinContainer<TSPacketType>(deferredTimestampPackets.get(), getGpgpuCommandStreamReceiver()); waitForTimestampsWithinContainer<TSPacketType>(deferredTimestampPackets.get(), getGpgpuCommandStreamReceiver());
} }
} }

View File

@@ -467,7 +467,7 @@ void Event::updateExecutionStatus() {
// Note : Intentional fallthrough (no return) to check for CL_COMPLETE // Note : Intentional fallthrough (no return) to check for CL_COMPLETE
} }
if ((cmdQueue != nullptr) && (cmdQueue->isCompleted(getCompletionStamp(), this->bcsState))) { if ((cmdQueue != nullptr) && this->isCompleted()) {
transitionExecutionStatus(CL_COMPLETE); transitionExecutionStatus(CL_COMPLETE);
executeCallbacks(CL_COMPLETE); executeCallbacks(CL_COMPLETE);
unblockEventsBlockedByThis(CL_COMPLETE); unblockEventsBlockedByThis(CL_COMPLETE);
@@ -676,6 +676,26 @@ inline void Event::setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_e
}); });
} }
bool Event::isCompleted() {
return cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted();
}
bool Event::areTimestampsCompleted() {
if (this->timestampPacketContainer.get()) {
if (this->cmdQueue->isWaitForTimestampsEnabled()) {
for (const auto &timestamp : this->timestampPacketContainer->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
if (timestamp->getContextEndValue(i) == 1) {
return false;
}
}
}
return true;
}
}
return false;
}
uint32_t Event::getTaskLevel() { uint32_t Event::getTaskLevel() {
return taskLevel; return taskLevel;
} }

View File

@@ -190,6 +190,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
} }
bool updateStatusAndCheckCompletion(); bool updateStatusAndCheckCompletion();
bool isCompleted();
// Note from OCL spec : // Note from OCL spec :
// "A negative integer value causes all enqueued commands that wait on this user event // "A negative integer value causes all enqueued commands that wait on this user event
@@ -354,6 +355,8 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
static void setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last); static void setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last);
bool areTimestampsCompleted();
bool currentCmdQVirtualEvent; bool currentCmdQVirtualEvent;
std::atomic<Command *> cmdToSubmit; std::atomic<Command *> cmdToSubmit;
std::atomic<Command *> submittedCmd; std::atomic<Command *> submittedCmd;

View File

@@ -775,6 +775,62 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
} }
HWTEST_F(TimestampPacketTests, givenTimestampWaitEnabledWhenEnqueueWithEventThenEventHasCorrectTimestampsToCheckForCompletion) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
*csr.getTagAddress() = 0u;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
cl_event clEvent1;
cl_event clEvent2;
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent1);
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent2);
cmdQ->flush();
Event &event1 = static_cast<Event &>(*clEvent1);
Event &event2 = static_cast<Event &>(*clEvent2);
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
EXPECT_FALSE(event1.isCompleted());
EXPECT_FALSE(event2.isCompleted());
typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2};
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
}
EXPECT_TRUE(event1.isCompleted());
EXPECT_FALSE(event2.isCompleted());
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
}
EXPECT_TRUE(event1.isCompleted());
EXPECT_TRUE(event2.isCompleted());
cmdQ->finish();
EXPECT_TRUE(event1.isCompleted());
EXPECT_TRUE(event2.isCompleted());
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
clReleaseEvent(clEvent1);
clReleaseEvent(clEvent2);
*csr.getTagAddress() = csr.peekTaskCount();
}
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) { HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.UpdateTaskCountFromWait.set(3);