mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 17:29:14 +08:00
performance: skip queue state check when waiting for latest IOQ TSP
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
73ffc56938
commit
59233d9597
@@ -454,7 +454,10 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||||
|
|
||||||
cmdQueue->handlePostCompletionOperations(true);
|
bool checkQueueCompletionForPostSyncOperations = !(waitedOnTimestamps && !cmdQueue->isOOQEnabled() &&
|
||||||
|
(this->timestampPacketContainer->peekNodes() == cmdQueue->getTimestampPacketContainer()->peekNodes()));
|
||||||
|
|
||||||
|
cmdQueue->handlePostCompletionOperations(checkQueueCompletionForPostSyncOperations);
|
||||||
|
|
||||||
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||||
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#include "opencl/source/event/user_event.h"
|
#include "opencl/source/event/user_event.h"
|
||||||
#include "opencl/source/gtpin/gtpin_defs.h"
|
#include "opencl/source/gtpin/gtpin_defs.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||||
|
|
||||||
#include "cl_api_tests.h"
|
#include "cl_api_tests.h"
|
||||||
|
|
||||||
@@ -154,6 +155,60 @@ HWTEST_F(clEnqueueWaitForEventsTests, givenOoqWhenWaitingForEventThenCallWaitFor
|
|||||||
EXPECT_TRUE(commandQueueHw.latestWaitForTimestampsStatus);
|
EXPECT_TRUE(commandQueueHw.latestWaitForTimestampsStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct clEnqueueWaitForTimestampsTests : public clEnqueueWaitForEventsTests {
|
||||||
|
void SetUp() override {
|
||||||
|
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
|
||||||
|
DebugManager.flags.EnableTimestampWaitForEvents.set(4);
|
||||||
|
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||||
|
|
||||||
|
clEnqueueWaitForEventsTests::SetUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
};
|
||||||
|
|
||||||
|
HWTEST_F(clEnqueueWaitForTimestampsTests, givenIoqWhenWaitingForLatestEventThenDontCheckQueueCompletion) {
|
||||||
|
MockCommandQueueHw<FamilyType> commandQueueHw(pContext, pDevice, nullptr);
|
||||||
|
|
||||||
|
MockKernelWithInternals kernel(*pDevice);
|
||||||
|
|
||||||
|
cl_event event0, event1;
|
||||||
|
|
||||||
|
const size_t gws[] = {1, 1, 1};
|
||||||
|
commandQueueHw.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event0);
|
||||||
|
commandQueueHw.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
|
||||||
|
|
||||||
|
auto eventObj0 = castToObjectOrAbort<Event>(event0);
|
||||||
|
auto eventObj1 = castToObjectOrAbort<Event>(event1);
|
||||||
|
|
||||||
|
auto node0 = eventObj0->getTimestampPacketNodes()->peekNodes()[0];
|
||||||
|
auto node1 = eventObj1->getTimestampPacketNodes()->peekNodes()[0];
|
||||||
|
|
||||||
|
auto contextEnd0 = ptrOffset(node0->getCpuBase(), node0->getContextEndOffset());
|
||||||
|
auto contextEnd1 = ptrOffset(node1->getCpuBase(), node1->getContextEndOffset());
|
||||||
|
|
||||||
|
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd0) = 0;
|
||||||
|
*reinterpret_cast<typename FamilyType::TimestampPacketType *>(contextEnd1) = 0;
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, commandQueueHw.isCompletedCalled);
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event0));
|
||||||
|
EXPECT_EQ(1u, commandQueueHw.isCompletedCalled);
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event1));
|
||||||
|
EXPECT_EQ(1u, commandQueueHw.isCompletedCalled);
|
||||||
|
|
||||||
|
commandQueueHw.setOoqEnabled();
|
||||||
|
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event0));
|
||||||
|
EXPECT_EQ(2u, commandQueueHw.isCompletedCalled);
|
||||||
|
|
||||||
|
EXPECT_EQ(CL_SUCCESS, clEnqueueWaitForEvents(&commandQueueHw, 1, &event1));
|
||||||
|
EXPECT_EQ(3u, commandQueueHw.isCompletedCalled);
|
||||||
|
|
||||||
|
clReleaseEvent(event0);
|
||||||
|
clReleaseEvent(event1);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
HWTEST_F(clEnqueueWaitForEventsTests, givenAlreadyCompletedEventWhenWaitForCompletionThenCheckGpuStateOnce) {
|
||||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
auto csrTagAddress = ultCsr.getTagAddress();
|
auto csrTagAddress = ultCsr.getTagAddress();
|
||||||
|
|||||||
@@ -433,7 +433,13 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
|||||||
latestWaitForTimestampsStatus = BaseClass::waitForTimestamps(copyEnginesToWait, status, mainContainer, deferredContainer);
|
latestWaitForTimestampsStatus = BaseClass::waitForTimestamps(copyEnginesToWait, status, mainContainer, deferredContainer);
|
||||||
|
|
||||||
return latestWaitForTimestampsStatus;
|
return latestWaitForTimestampsStatus;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
bool isCompleted(TaskCountType gpgpuTaskCount, const Range<CopyEngineState> &bcsStates) override {
|
||||||
|
isCompletedCalled++;
|
||||||
|
|
||||||
|
return CommandQueue::isCompleted(gpgpuTaskCount, bcsStates);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int lastCommandType;
|
unsigned int lastCommandType;
|
||||||
std::vector<Kernel *> lastEnqueuedKernels;
|
std::vector<Kernel *> lastEnqueuedKernels;
|
||||||
@@ -459,6 +465,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
|||||||
} overrideIsCacheFlushForBcsRequired;
|
} overrideIsCacheFlushForBcsRequired;
|
||||||
BuiltinOpParams kernelParams;
|
BuiltinOpParams kernelParams;
|
||||||
std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
||||||
|
std::atomic<uint32_t> isCompletedCalled = 0;
|
||||||
bool flushCalled = false;
|
bool flushCalled = false;
|
||||||
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
|
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
|
||||||
std::optional<WaitStatus> waitUntilCompleteReturnValue{};
|
std::optional<WaitStatus> waitUntilCompleteReturnValue{};
|
||||||
|
|||||||
Reference in New Issue
Block a user