performance: allow waiting for OOQ timestamps in clEnqueueWaitForEvents

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-07-13 15:48:26 +00:00
committed by Compute-Runtime-Automation
parent d74bba95c4
commit 815b37bf3a
9 changed files with 59 additions and 14 deletions

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/array_count.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/context/context.h"
@@ -129,6 +130,30 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI
ASSERT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(clEnqueueWaitForEventsTests, givenOoqWhenWaitingForEventThenCallWaitForTimestamps) {
MockCommandQueueHw<FamilyType> commandQueueHw(pContext, pDevice, nullptr);
DebugManagerStateRestore restore;
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
commandQueueHw.setOoqEnabled();
MockEvent<Event> event(&commandQueueHw, CL_COMMAND_READ_BUFFER, 0, 0);
event.timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>(*pDevice->getUltCommandStreamReceiver<FamilyType>().getTimestampPacketAllocator(), 1);
auto node = event.timestampPacketContainer->peekNodes()[0];
auto contextEnd = ptrOffset(node->getCpuBase(), node->getContextEndOffset());
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd) = 0;
cl_event hEvent = &event;
auto retVal = clEnqueueWaitForEvents(&commandQueueHw, 1, &hEvent);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(commandQueueHw.waitForTimestampsCalled);
EXPECT_TRUE(commandQueueHw.latestWaitForTimestampsStatus);
}
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto csrTagAddress = ultCsr.getTagAddress();

View File

@@ -54,7 +54,7 @@ HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoW
auto taskCount = device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount();
auto status = WaitStatus::NotReady;
cmdQ.waitForTimestamps({}, 101u, status, cmdQ.timestampPacketContainer.get(), cmdQ.deferredTimestampPackets.get());
cmdQ.waitForTimestamps({}, status, cmdQ.timestampPacketContainer.get(), cmdQ.deferredTimestampPackets.get());
EXPECT_EQ(device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount(), taskCount);
}

View File

@@ -835,7 +835,11 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD
cl_event event1, event2;
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
auto node1 = timestampPacketContainer->peekNodes()[0];
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2);
auto node2 = timestampPacketContainer->peekNodes()[0];
cmdQ->flush();
EXPECT_EQ(2u, csr.taskCount);
@@ -846,6 +850,12 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD
auto eventObj1 = castToObjectOrAbort<Event>(event1);
auto eventObj2 = castToObjectOrAbort<Event>(event2);
auto contextEnd1 = ptrOffset(node1->getCpuBase(), node1->getContextEndOffset());
auto contextEnd2 = ptrOffset(node2->getCpuBase(), node2->getContextEndOffset());
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd1) = 0;
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd2) = 0;
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());

View File

@@ -219,7 +219,7 @@ class MockCommandQueue : public CommandQueue {
bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; }
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
waitForTimestampsCalled = true;
return false;
};
@@ -424,6 +424,14 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
return BaseClass::isGpgpuSubmissionForBcsRequired(queueBlocked, timestampPacketDependencies);
}
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
waitForTimestampsCalled = true;
latestWaitForTimestampsStatus = BaseClass::waitForTimestamps(copyEnginesToWait, status, mainContainer, deferredContainer);
return latestWaitForTimestampsStatus;
};
unsigned int lastCommandType;
std::vector<Kernel *> lastEnqueuedKernels;
MultiDispatchInfo storedMultiDispatchInfo;
@@ -437,6 +445,8 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool notifyEnqueueSVMMemcpyCalled = false;
bool cpuDataTransferHandlerCalled = false;
bool useBcsCsrOnNotifyEnabled = false;
bool waitForTimestampsCalled = false;
bool latestWaitForTimestampsStatus = false;
int setQueueBlocked = -1;
int forceGpgpuSubmissionForBcsRequired = -1;
mutable bool isBlitEnqueueImageAllowed = false;