mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 17:29:14 +08:00
performance: skip queue state check when waiting for latest IOQ TSP
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
73ffc56938
commit
59233d9597
@@ -454,7 +454,10 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
||||
|
||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||
|
||||
cmdQueue->handlePostCompletionOperations(true);
|
||||
bool checkQueueCompletionForPostSyncOperations = !(waitedOnTimestamps && !cmdQueue->isOOQEnabled() &&
|
||||
(this->timestampPacketContainer->peekNodes() == cmdQueue->getTimestampPacketContainer()->peekNodes()));
|
||||
|
||||
cmdQueue->handlePostCompletionOperations(checkQueueCompletionForPostSyncOperations);
|
||||
|
||||
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/gtpin/gtpin_defs.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
||||
#include "cl_api_tests.h"
|
||||
|
||||
@@ -154,6 +155,60 @@ HWTEST_F(clEnqueueWaitForEventsTests, givenOoqWhenWaitingForEventThenCallWaitFor
|
||||
EXPECT_TRUE(commandQueueHw.latestWaitForTimestampsStatus);
|
||||
}
|
||||
|
||||
struct clEnqueueWaitForTimestampsTests : public clEnqueueWaitForEventsTests {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
|
||||
DebugManager.flags.EnableTimestampWaitForEvents.set(4);
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
|
||||
clEnqueueWaitForEventsTests::SetUp();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
HWTEST_F(clEnqueueWaitForTimestampsTests, givenIoqWhenWaitingForLatestEventThenDontCheckQueueCompletion) {
|
||||
MockCommandQueueHw<FamilyType> commandQueueHw(pContext, pDevice, nullptr);
|
||||
|
||||
MockKernelWithInternals kernel(*pDevice);
|
||||
|
||||
cl_event event0, event1;
|
||||
|
||||
const size_t gws[] = {1, 1, 1};
|
||||
commandQueueHw.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event0);
|
||||
commandQueueHw.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
|
||||
|
||||
auto eventObj0 = castToObjectOrAbort<Event>(event0);
|
||||
auto eventObj1 = castToObjectOrAbort<Event>(event1);
|
||||
|
||||
auto node0 = eventObj0->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto node1 = eventObj1->getTimestampPacketNodes()->peekNodes()[0];
|
||||
|
||||
auto contextEnd0 = ptrOffset(node0->getCpuBase(), node0->getContextEndOffset());
|
||||
auto contextEnd1 = ptrOffset(node1->getCpuBase(), node1->getContextEndOffset());
|
||||
|
||||
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd0) = 0;
|
||||
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd1) = 0;
|
||||
|
||||
EXPECT_EQ(0u, commandQueueHw.isCompletedCalled);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event0));
|
||||
EXPECT_EQ(1u, commandQueueHw.isCompletedCalled);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event1));
|
||||
EXPECT_EQ(1u, commandQueueHw.isCompletedCalled);
|
||||
|
||||
commandQueueHw.setOoqEnabled();
|
||||
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event0));
|
||||
EXPECT_EQ(2u, commandQueueHw.isCompletedCalled);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event1));
|
||||
EXPECT_EQ(3u, commandQueueHw.isCompletedCalled);
|
||||
|
||||
clReleaseEvent(event0);
|
||||
clReleaseEvent(event1);
|
||||
}
|
||||
|
||||
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto csrTagAddress = ultCsr.getTagAddress();
|
||||
|
||||
@@ -433,7 +433,13 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
latestWaitForTimestampsStatus = BaseClass::waitForTimestamps(copyEnginesToWait, status, mainContainer, deferredContainer);
|
||||
|
||||
return latestWaitForTimestampsStatus;
|
||||
};
|
||||
}
|
||||
|
||||
bool isCompleted(TaskCountType gpgpuTaskCount, const Range<CopyEngineState> &bcsStates) override {
|
||||
isCompletedCalled++;
|
||||
|
||||
return CommandQueue::isCompleted(gpgpuTaskCount, bcsStates);
|
||||
}
|
||||
|
||||
unsigned int lastCommandType;
|
||||
std::vector<Kernel *> lastEnqueuedKernels;
|
||||
@@ -459,6 +465,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
} overrideIsCacheFlushForBcsRequired;
|
||||
BuiltinOpParams kernelParams;
|
||||
std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
||||
std::atomic<uint32_t> isCompletedCalled = 0;
|
||||
bool flushCalled = false;
|
||||
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
|
||||
std::optional<WaitStatus> waitUntilCompleteReturnValue{};
|
||||
|
||||
Reference in New Issue
Block a user