Add logic for Events in multi-thread scenario

- inc refCount when enqueue is blocked and dec after flushing Change-Id: I9e8f8d226897124a7e51f2473939d53868bef7a2
2025-09-15 13:01:45 +08:00 · 2018-12-04 14:18:17 +01:00
parent 0c2dc1b438
commit 1001f76085
4 changed files with 38 additions and 3 deletions
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2018 Intel Corporation
+ * Copyright (C) 2017-2019 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -697,6 +697,10 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
            (uint32_t)multiDispatchInfo.size());

        if (timestampPacketContainer.get()) {
+            for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) {
+                auto event = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
+                event->incRefInternal();
+            }
            cmd->setTimestampPacketNode(*timestampPacketContainer, *previousTimestampPacketNodes);
        }
        cmd->setEventsRequest(eventsRequest);
--- a/runtime/helpers/task_information.cpp
+++ b/runtime/helpers/task_information.cpp
@ -111,6 +111,14 @@ CommandComputeKernel::~CommandComputeKernel() {
        kernelOperation->doNotFreeISH = true;
    }
    kernel->decRefInternal();
+
+    auto &commandStreamReceiver = commandQueue.getCommandStreamReceiver();
+    if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
+        for (cl_event eventFromWaitList : eventsWaitlist) {
+            auto event = castToObjectOrAbort<Event>(eventFromWaitList);
+            event->decRefInternal();
+        }
+    }
 }

 CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) {
@ -220,6 +228,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
                                                      taskLevel,
                                                      dispatchFlags,
                                                      commandQueue.getDevice());
+
    commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
    if (printfHandler) {
        printfHandler.get()->printEnqueueOutput();
--- a/unit_tests/helpers/task_information_tests.cpp
+++ b/unit_tests/helpers/task_information_tests.cpp
@ -110,11 +110,16 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
    std::vector<Surface *> surfaces;
    auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
                                               *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
-    MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel);

    UserEvent event1, event2, event3;
    cl_event waitlist[] = {&event1, &event2};
    EventsRequest eventsRequest(2, waitlist, nullptr);
+
+    MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel);
+
+    event1.incRefInternal();
+    event2.incRefInternal();
+
    command.setEventsRequest(eventsRequest);

    waitlist[1] = &event3;
--- a/unit_tests/helpers/timestamp_packet_tests.cpp
+++ b/unit_tests/helpers/timestamp_packet_tests.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -812,6 +812,23 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMake
    EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getGraphicsAllocation()));
 }

+HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenVirtualEventIncrementsRefInternalAndDecrementsAfterCompleteEvent) {
+    auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
+    csr.timestampPacketWriteEnabled = true;
+    MockKernelWithInternals mockKernelWithInternals(*device, context.get());
+    auto mockKernel = mockKernelWithInternals.mockKernel;
+    auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
+
+    UserEvent userEvent;
+    cl_event waitlist = &userEvent;
+
+    auto internalCount = userEvent.getRefInternalCount();
+    cmdQ->enqueueKernel(mockKernel, 1, nullptr, gws, nullptr, 1, &waitlist, nullptr);
+    EXPECT_EQ(internalCount + 1, userEvent.getRefInternalCount());
+    userEvent.setStatus(CL_COMPLETE);
+    EXPECT_EQ(internalCount, mockKernel->getRefInternalCount());
+}
+
 TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObtainEnoughTags) {
    size_t dispatchSize = 3;