performance: check completion alloc only once when waiting for Event
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
ed972bb21c
commit
712e059ace
|
@ -214,7 +214,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
volatile TagAddressType *getHwTagAddress() const;
|
volatile TagAddressType *getHwTagAddress() const;
|
||||||
|
|
||||||
bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState);
|
MOCKABLE_VIRTUAL bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState);
|
||||||
|
|
||||||
bool isWaitForTimestampsEnabled() const;
|
bool isWaitForTimestampsEnabled() const;
|
||||||
virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0;
|
virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0;
|
||||||
|
|
|
@ -447,6 +447,9 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
||||||
if (waitStatus == WaitStatus::GpuHang) {
|
if (waitStatus == WaitStatus::GpuHang) {
|
||||||
return WaitStatus::GpuHang;
|
return WaitStatus::GpuHang;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->gpuStateWaited = true;
|
||||||
|
|
||||||
updateExecutionStatus();
|
updateExecutionStatus();
|
||||||
|
|
||||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||||
|
@ -704,7 +707,15 @@ inline void Event::setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_e
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Event::isCompleted() {
|
bool Event::isCompleted() {
|
||||||
return cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted();
|
if (gpuStateWaited) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted()) {
|
||||||
|
gpuStateWaited = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gpuStateWaited;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Event::isWaitForTimestampsEnabled() const {
|
bool Event::isWaitForTimestampsEnabled() const {
|
||||||
|
|
|
@ -393,6 +393,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||||
// number of events this event depends on
|
// number of events this event depends on
|
||||||
std::unique_ptr<TimestampPacketContainer> multiRootDeviceTimestampPacketContainer;
|
std::unique_ptr<TimestampPacketContainer> multiRootDeviceTimestampPacketContainer;
|
||||||
std::atomic<int> parentCount;
|
std::atomic<int> parentCount;
|
||||||
|
std::atomic<bool> gpuStateWaited = false;
|
||||||
// event parents
|
// event parents
|
||||||
std::vector<Event *> parentEvents;
|
std::vector<Event *> parentEvents;
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,57 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI
|
||||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
||||||
|
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
auto csrTagAddress = ultCsr.getTagAddress();
|
||||||
|
|
||||||
|
TaskCountType eventTaskCount = 5;
|
||||||
|
|
||||||
|
*csrTagAddress = eventTaskCount - 1;
|
||||||
|
|
||||||
|
MockEvent<Event> event1(pCommandQueue, CL_COMMAND_READ_BUFFER, 0, eventTaskCount);
|
||||||
|
MockEvent<Event> event2(pCommandQueue, CL_COMMAND_READ_BUFFER, 0, eventTaskCount);
|
||||||
|
cl_event hEvent1 = &event1;
|
||||||
|
cl_event hEvent2 = &event2;
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
// Event 1
|
||||||
|
event1.updateExecutionStatus();
|
||||||
|
EXPECT_EQ(1u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
event1.updateExecutionStatus();
|
||||||
|
EXPECT_EQ(2u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
*csrTagAddress = eventTaskCount;
|
||||||
|
|
||||||
|
event1.updateExecutionStatus();
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
event1.updateExecutionStatus();
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
auto retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent1);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
// Event 2
|
||||||
|
retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent2);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
// clEnqueueWaitForEvents signals completion before isCompletedCalled()
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
retVal = clEnqueueWaitForEvents(pCommandQueue, 1, &hEvent2);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
|
||||||
|
event2.updateExecutionStatus();
|
||||||
|
EXPECT_EQ(3u, pCommandQueue->isCompletedCalled);
|
||||||
|
}
|
||||||
|
|
||||||
struct GTPinMockCommandQueue : MockCommandQueue {
|
struct GTPinMockCommandQueue : MockCommandQueue {
|
||||||
GTPinMockCommandQueue(Context *context, MockClDevice *device) : MockCommandQueue(context, device, nullptr, false) {}
|
GTPinMockCommandQueue(Context *context, MockClDevice *device) : MockCommandQueue(context, device, nullptr, false) {}
|
||||||
WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
|
WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
|
||||||
|
|
|
@ -224,9 +224,16 @@ class MockCommandQueue : public CommandQueue {
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState) override {
|
||||||
|
isCompletedCalled++;
|
||||||
|
|
||||||
|
return CommandQueue::isCompleted(gpgpuTaskCount, bcsState);
|
||||||
|
}
|
||||||
|
|
||||||
bool releaseIndirectHeapCalled = false;
|
bool releaseIndirectHeapCalled = false;
|
||||||
bool waitForTimestampsCalled = false;
|
bool waitForTimestampsCalled = false;
|
||||||
cl_int writeBufferRetValue = CL_SUCCESS;
|
cl_int writeBufferRetValue = CL_SUCCESS;
|
||||||
|
uint32_t isCompletedCalled = 0;
|
||||||
uint32_t writeBufferCounter = 0;
|
uint32_t writeBufferCounter = 0;
|
||||||
bool writeBufferBlocking = false;
|
bool writeBufferBlocking = false;
|
||||||
size_t writeBufferOffset = 0;
|
size_t writeBufferOffset = 0;
|
||||||
|
|
Loading…
Reference in New Issue