Pass eventWaitList to blocked command for semaphore programming

Change-Id: I8b56be03a7b89283f5368cf42d6788d70ebecdc7 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
2025-12-30 01:35:20 +08:00 · 2018-09-19 10:34:33 -07:00
parent 26006a8482
commit e06b370697
7 changed files with 91 additions and 82 deletions
--- a/runtime/command_queue/command_queue_hw.h
+++ b/runtime/command_queue/command_queue_hw.h
@@ -322,11 +322,11 @@ class CommandQueueHw : public CommandQueue {
                        bool &blocking,
                        const MultiDispatchInfo &multiDispatchInfo,
                        KernelOperation *blockedCommandsData,
-                        cl_uint numEventsInWaitList,
-                        const cl_event *eventWaitList,
+                        EventsRequest &eventsRequest,
                        bool slmUsed,
                        EventBuilder &externalEventBuilder,
                        std::unique_ptr<PrintfHandler> printfHandler);
+
  protected:
    MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
    MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface);
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -292,6 +292,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        slmUsed = multiDispatchInfo.usesSlm();
    }

+    EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
    CompletionStamp completionStamp;
    if (!blockQueue) {
        if (parentKernel) {
@@ -340,8 +341,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true;

        if (submissionRequired) {
-            EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
-
            completionStamp = enqueueNonBlocked<commandType>(
                surfacesForResidency,
                numSurfaceForResidency,
@@ -426,8 +425,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
            blocking,
            multiDispatchInfo,
            blockedCommandsData,
-            numEventsInWaitList,
-            eventWaitList,
+            eventsRequest,
            slmUsed,
            eventBuilder,
            std::move(printfHandler));
@@ -616,8 +614,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
    bool &blocking,
    const MultiDispatchInfo &multiDispatchInfo,
    KernelOperation *blockedCommandsData,
-    cl_uint numEventsInWaitList,
-    const cl_event *eventWaitList,
+    EventsRequest &eventsRequest,
    bool slmUsed,
    EventBuilder &externalEventBuilder,
    std::unique_ptr<PrintfHandler> printfHandler) {
@@ -678,7 +675,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
        auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
        auto cmd = std::make_unique<CommandComputeKernel>(
            *this,
-            commandStreamReceiver,
            std::move(kernelOperation),
            allSurfaces,
            shouldFlushDC(commandType, printfHandler.get()),
@@ -692,10 +688,11 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
        if (timestampPacketNode) {
            cmd->setTimestampPacketNode(timestampPacketNode);
        }
+        cmd->setEventsRequest(eventsRequest);
        eventBuilder->getEvent()->setCommand(std::move(cmd));
    }

-    eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventWaitList, numEventsInWaitList));
+    eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventsRequest.eventWaitList, eventsRequest.numEventsInWaitList));
    eventBuilder->addParentEvent(this->virtualEvent);
    eventBuilder->finalize();

--- a/runtime/helpers/task_information.cpp
+++ b/runtime/helpers/task_information.cpp
@@ -89,30 +89,21 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
    return completionStamp;
 }

-CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
-                                           std::unique_ptr<KernelOperation> kernelOperation, std::vector<Surface *> &surfaces,
+CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> kernelOperation, std::vector<Surface *> &surfaces,
                                           bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
                                           PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
-    : commandQueue(commandQueue),
-      commandStreamReceiver(commandStreamReceiver),
-      kernelOperation(std::move(kernelOperation)),
-      flushDC(flushDC),
-      slmUsed(usesSLM),
-      NDRangeKernel(ndRangeKernel),
-      printfHandler(std::move(printfHandler)),
-      kernel(nullptr),
-      kernelCount(0) {
+    : commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)), flushDC(flushDC), slmUsed(usesSLM),
+      NDRangeKernel(ndRangeKernel), printfHandler(std::move(printfHandler)), kernel(kernel),
+      kernelCount(kernelCount), preemptionMode(preemptionMode) {
    for (auto surface : surfaces) {
        this->surfaces.push_back(surface);
    }
-    this->kernel = kernel;
    UNRECOVERABLE_IF(nullptr == this->kernel);
    kernel->incRefInternal();
-    this->kernelCount = kernelCount;
-    this->preemptionMode = preemptionMode;
 }

 CommandComputeKernel::~CommandComputeKernel() {
+    auto &commandStreamReceiver = commandQueue.getDevice().getCommandStreamReceiver();
    if (timestampPacketNode) {
        auto allocator = commandStreamReceiver.getMemoryManager()->getTimestampPacketAllocator();
        allocator->returnTag(timestampPacketNode);
@@ -131,6 +122,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
    if (terminated) {
        return completionStamp;
    }
+    auto &commandStreamReceiver = commandQueue.getDevice().getCommandStreamReceiver();
    bool executionModelKernel = kernel->isParentKernel;
    auto devQueue = commandQueue.getContext().getDefaultDeviceQueue();

@@ -213,6 +205,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
    dispatchFlags.throttle = commandQueue.getThrottle();
    dispatchFlags.preemptionMode = preemptionMode;
    dispatchFlags.mediaSamplerRequired = kernel->isVmeKernel();
+    if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
+        dispatchFlags.outOfDeviceDependencies = &eventsRequest;
+    }

    DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

--- a/runtime/helpers/task_information.h
+++ b/runtime/helpers/task_information.h
@@ -87,8 +87,7 @@ struct KernelOperation {

 class CommandComputeKernel : public Command {
  public:
-    CommandComputeKernel(CommandQueue &commandQueue, CommandStreamReceiver &commandStreamReceiver,
-                         std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces,
+    CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces,
                         bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
                         PreemptionMode preemptionMode, Kernel *kernel = nullptr, uint32_t kernelCount = 0);

@@ -99,10 +98,10 @@ class CommandComputeKernel : public Command {
    LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); }

    void setTimestampPacketNode(TagNode<TimestampPacket> *node);
+    void setEventsRequest(EventsRequest &eventsRequest) { this->eventsRequest = eventsRequest; }

  private:
    CommandQueue &commandQueue;
-    CommandStreamReceiver &commandStreamReceiver;
    std::unique_ptr<KernelOperation> kernelOperation;
    std::vector<Surface *> surfaces;
    bool flushDC;
@@ -113,6 +112,7 @@ class CommandComputeKernel : public Command {
    uint32_t kernelCount;
    PreemptionMode preemptionMode;
    TagNode<TimestampPacket> *timestampPacketNode = nullptr;
+    EventsRequest eventsRequest = {0, nullptr, nullptr};
 };

 class CommandMarker : public Command {