mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
performance: allow waiting for OOQ timestamps in clEnqueueWaitForEvents
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d74bba95c4
commit
815b37bf3a
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
@@ -129,6 +130,30 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
HWTEST_F(clEnqueueWaitForEventsTests, givenOoqWhenWaitingForEventThenCallWaitForTimestamps) {
|
||||
MockCommandQueueHw<FamilyType> commandQueueHw(pContext, pDevice, nullptr);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
|
||||
commandQueueHw.setOoqEnabled();
|
||||
|
||||
MockEvent<Event> event(&commandQueueHw, CL_COMMAND_READ_BUFFER, 0, 0);
|
||||
event.timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>(*pDevice->getUltCommandStreamReceiver<FamilyType>().getTimestampPacketAllocator(), 1);
|
||||
|
||||
auto node = event.timestampPacketContainer->peekNodes()[0];
|
||||
auto contextEnd = ptrOffset(node->getCpuBase(), node->getContextEndOffset());
|
||||
|
||||
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd) = 0;
|
||||
|
||||
cl_event hEvent = &event;
|
||||
|
||||
auto retVal = clEnqueueWaitForEvents(&commandQueueHw, 1, &hEvent);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(commandQueueHw.waitForTimestampsCalled);
|
||||
EXPECT_TRUE(commandQueueHw.latestWaitForTimestampsStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto csrTagAddress = ultCsr.getTagAddress();
|
||||
|
||||
@@ -54,7 +54,7 @@ HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoW
|
||||
auto taskCount = device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount();
|
||||
auto status = WaitStatus::NotReady;
|
||||
|
||||
cmdQ.waitForTimestamps({}, 101u, status, cmdQ.timestampPacketContainer.get(), cmdQ.deferredTimestampPackets.get());
|
||||
cmdQ.waitForTimestamps({}, status, cmdQ.timestampPacketContainer.get(), cmdQ.deferredTimestampPackets.get());
|
||||
|
||||
EXPECT_EQ(device->getUltCommandStreamReceiver<FamilyType>().peekLatestFlushedTaskCount(), taskCount);
|
||||
}
|
||||
|
||||
@@ -835,7 +835,11 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD
|
||||
cl_event event1, event2;
|
||||
|
||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
|
||||
auto node1 = timestampPacketContainer->peekNodes()[0];
|
||||
|
||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2);
|
||||
auto node2 = timestampPacketContainer->peekNodes()[0];
|
||||
|
||||
cmdQ->flush();
|
||||
|
||||
EXPECT_EQ(2u, csr.taskCount);
|
||||
@@ -846,6 +850,12 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD
|
||||
auto eventObj1 = castToObjectOrAbort<Event>(event1);
|
||||
auto eventObj2 = castToObjectOrAbort<Event>(event2);
|
||||
|
||||
auto contextEnd1 = ptrOffset(node1->getCpuBase(), node1->getContextEndOffset());
|
||||
auto contextEnd2 = ptrOffset(node2->getCpuBase(), node2->getContextEndOffset());
|
||||
|
||||
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd1) = 0;
|
||||
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd2) = 0;
|
||||
|
||||
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
||||
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ class MockCommandQueue : public CommandQueue {
|
||||
|
||||
bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; }
|
||||
|
||||
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
|
||||
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
|
||||
waitForTimestampsCalled = true;
|
||||
return false;
|
||||
};
|
||||
@@ -424,6 +424,14 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return BaseClass::isGpgpuSubmissionForBcsRequired(queueBlocked, timestampPacketDependencies);
|
||||
}
|
||||
|
||||
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
|
||||
waitForTimestampsCalled = true;
|
||||
|
||||
latestWaitForTimestampsStatus = BaseClass::waitForTimestamps(copyEnginesToWait, status, mainContainer, deferredContainer);
|
||||
|
||||
return latestWaitForTimestampsStatus;
|
||||
};
|
||||
|
||||
unsigned int lastCommandType;
|
||||
std::vector<Kernel *> lastEnqueuedKernels;
|
||||
MultiDispatchInfo storedMultiDispatchInfo;
|
||||
@@ -437,6 +445,8 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
bool notifyEnqueueSVMMemcpyCalled = false;
|
||||
bool cpuDataTransferHandlerCalled = false;
|
||||
bool useBcsCsrOnNotifyEnabled = false;
|
||||
bool waitForTimestampsCalled = false;
|
||||
bool latestWaitForTimestampsStatus = false;
|
||||
int setQueueBlocked = -1;
|
||||
int forceGpgpuSubmissionForBcsRequired = -1;
|
||||
mutable bool isBlitEnqueueImageAllowed = false;
|
||||
|
||||
Reference in New Issue
Block a user