performance: check completion alloc only once when waiting for Event
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
ed972bb21c
commit
712e059ace
|
@ -214,7 +214,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
|
||||
volatile TagAddressType *getHwTagAddress() const;
|
||||
|
||||
bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState);
|
||||
MOCKABLE_VIRTUAL bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState);
|
||||
|
||||
bool isWaitForTimestampsEnabled() const;
|
||||
virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0;
|
||||
|
|
|
@ -447,6 +447,9 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
|||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
|
||||
this->gpuStateWaited = true;
|
||||
|
||||
updateExecutionStatus();
|
||||
|
||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||
|
@ -704,7 +707,15 @@ inline void Event::setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_e
|
|||
}
|
||||
|
||||
bool Event::isCompleted() {
|
||||
return cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted();
|
||||
if (gpuStateWaited) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted()) {
|
||||
gpuStateWaited = true;
|
||||
}
|
||||
|
||||
return gpuStateWaited;
|
||||
}
|
||||
|
||||
bool Event::isWaitForTimestampsEnabled() const {
|
||||
|
|
|
@ -393,6 +393,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
// number of events this event depends on
|
||||
std::unique_ptr<TimestampPacketContainer> multiRootDeviceTimestampPacketContainer;
|
||||
std::atomic<int> parentCount;
|
||||
std::atomic<bool> gpuStateWaited = false;
|
||||
// event parents
|
||||
std::vector<Event *> parentEvents;
|
||||
|
||||
|
|
|
@ -129,6 +129,57 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI
|
|||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto csrTagAddress = ultCsr.getTagAddress();
|
||||
|
||||
TaskCountType eventTaskCount = 5;
|
||||
|
||||
*csrTagAddress = eventTaskCount - 1;
|
||||
|
||||
MockEvent<Event> event1(pCommandQueue, CL_COMMAND_READ_BUFFER, 0, eventTaskCount);
|
||||
MockEvent<Event> event2(pCommandQueue, CL_COMMAND_READ_BUFFER, 0, eventTaskCount);
|
||||
cl_event hEvent1 = &event1;
|
||||
cl_event hEvent2 = &event2;
|
||||
|
||||
EXPECT_EQ(0u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
// Event 1
|
||||
event1.updateExecutionStatus();
|
||||
EXPECT_EQ(1u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
event1.updateExecutionStatus();
|
||||
EXPECT_EQ(2u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
*csrTagAddress = eventTaskCount;
|
||||
|
||||
event1.updateExecutionStatus();
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
event1.updateExecutionStatus();
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
auto retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
// Event 2
|
||||
retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// clEnqueueWaitForEvents signals completion before isCompletedCalled()
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
|
||||
event2.updateExecutionStatus();
|
||||
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||
}
|
||||
|
||||
struct GTPinMockCommandQueue : MockCommandQueue {
|
||||
GTPinMockCommandQueue(Context *context, MockClDevice *device) : MockCommandQueue(context, device, nullptr, false) {}
|
||||
WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
|
||||
|
|
|
@ -224,9 +224,16 @@ class MockCommandQueue : public CommandQueue {
|
|||
return false;
|
||||
};
|
||||
|
||||
bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState) override {
|
||||
isCompletedCalled++;
|
||||
|
||||
return CommandQueue::isCompleted(gpgpuTaskCount, bcsState);
|
||||
}
|
||||
|
||||
bool releaseIndirectHeapCalled = false;
|
||||
bool waitForTimestampsCalled = false;
|
||||
cl_int writeBufferRetValue = CL_SUCCESS;
|
||||
uint32_t isCompletedCalled = 0;
|
||||
uint32_t writeBufferCounter = 0;
|
||||
bool writeBufferBlocking = false;
|
||||
size_t writeBufferOffset = 0;
|
||||
|
|
Loading…
Reference in New Issue