TaskInformation cleanup

Change-Id: If723f477406ca13e40c77a9471de9b8e35beff1d Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com> Related-To: NEO-3433
2025-09-15 13:01:45 +08:00 · 2019-07-22 21:28:59 +02:00
parent 1b7e70a817
commit 4dd3292922
7 changed files with 136 additions and 153 deletions
--- a/runtime/command_queue/command_queue.cpp
+++ b/runtime/command_queue/command_queue.cpp
@ -497,7 +497,7 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
    }

    //store task data in event
-    auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, getGpgpuCommandStreamReceiver(), *this));
+    auto cmd = std::unique_ptr<Command>(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, *this));
    eventBuilder->getEvent()->setCommand(std::move(cmd));

    //bind output event with input events
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@ -769,13 +769,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(

    if (multiDispatchInfo.empty()) {
        DEBUG_BREAK_IF(!isCommandWithoutKernel(commandType));
-        auto cmdSize = static_cast<uint32_t>(EnqueueOperation<GfxFamily>::getSizeRequiredCS(commandType,
-                                                                                            isProfilingEnabled(),
-                                                                                            isPerfCountersEnabled(),
-                                                                                            *this,
-                                                                                            nullptr));
-
-        auto cmd = std::make_unique<CommandMarker>(*this, getGpgpuCommandStreamReceiver(), commandType, cmdSize);
+        auto cmd = std::make_unique<CommandMarker>(*this);

        eventBuilder->getEvent()->setCommand(std::move(cmd));
    } else {
--- a/runtime/command_stream/command_stream_receiver.cpp
+++ b/runtime/command_stream/command_stream_receiver.cpp
@ -216,7 +216,8 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int

 void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
    this->tagAllocation = allocation;
-    this->tagAddress = allocation ? reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer()) : nullptr;
+    UNRECOVERABLE_IF(allocation == nullptr);
+    this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
 }

 FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {
--- a/runtime/helpers/task_information.cpp
+++ b/runtime/helpers/task_information.cpp
@ -27,9 +27,9 @@ namespace NEO {
 template void KernelOperation::ResourceCleaner::operator()<LinearStream>(LinearStream *);
 template void KernelOperation::ResourceCleaner::operator()<IndirectHeap>(IndirectHeap *);

-CommandMapUnmap::CommandMapUnmap(MapOperationType op, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
-                                 CommandStreamReceiver &csr, CommandQueue &cmdQ)
-    : memObj(memObj), copySize(copySize), copyOffset(copyOffset), readOnly(readOnly), csr(csr), cmdQ(cmdQ), op(op) {
+CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
+                                 CommandQueue &commandQueue)
+    : Command(commandQueue), memObj(memObj), copySize(copySize), copyOffset(copyOffset), readOnly(readOnly), operationType(operationType) {
    memObj.incRefInternal();
 }

@ -39,40 +39,40 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
        return completionStamp;
    }

-    bool blocking = true;
-    auto commandStreamReceiverOwnership = csr.obtainUniqueOwnership();
-    auto &queueCommandStream = cmdQ.getCS(0);
+    auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
+    auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
+    auto &queueCommandStream = commandQueue.getCS(0);
    size_t offset = queueCommandStream.getUsed();

    DispatchFlags dispatchFlags;
-    dispatchFlags.blocking = blocking;
+    dispatchFlags.blocking = true;
    dispatchFlags.dcFlush = true;
    dispatchFlags.useSLM = true;
    dispatchFlags.guardCommandBufferWithPipeControl = true;
-    dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
-    dispatchFlags.throttle = cmdQ.getThrottle();
-    dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
-    dispatchFlags.multiEngineQueue = cmdQ.isMultiEngineQueue();
+    dispatchFlags.lowPriority = commandQueue.getPriority() == QueuePriority::LOW;
+    dispatchFlags.throttle = commandQueue.getThrottle();
+    dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), nullptr);
+    dispatchFlags.multiEngineQueue = commandQueue.isMultiEngineQueue();

    DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

-    gtpinNotifyPreFlushTask(&cmdQ);
+    gtpinNotifyPreFlushTask(&commandQueue);

-    completionStamp = csr.flushTask(queueCommandStream,
-                                    offset,
-                                    cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
-                                    cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
-                                    cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
-                                    taskLevel,
-                                    dispatchFlags,
-                                    cmdQ.getDevice());
+    completionStamp = commandStreamReceiver.flushTask(queueCommandStream,
+                                                      offset,
+                                                      commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
+                                                      commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
+                                                      commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
+                                                      taskLevel,
+                                                      dispatchFlags,
+                                                      commandQueue.getDevice());

    if (!memObj.isMemObjZeroCopy()) {
-        cmdQ.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
-        if (op == MAP) {
+        commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
+        if (operationType == MAP) {
            memObj.transferDataToHostPtr(copySize, copyOffset);
        } else if (!readOnly) {
-            DEBUG_BREAK_IF(op != UNMAP);
+            DEBUG_BREAK_IF(operationType != UNMAP);
            memObj.transferDataFromHostPtr(copySize, copyOffset);
        }
    }
@ -85,7 +85,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
 CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> kernelOperation, std::vector<Surface *> &surfaces,
                                           bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
                                           PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
-    : commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)), flushDC(flushDC), slmUsed(usesSLM),
+    : Command(commandQueue, kernelOperation), flushDC(flushDC), slmUsed(usesSLM),
      NDRangeKernel(ndRangeKernel), printfHandler(std::move(printfHandler)), kernel(kernel),
      kernelCount(kernelCount), preemptionMode(preemptionMode) {
    for (auto surface : surfaces) {
@ -97,14 +97,6 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::uniq

 CommandComputeKernel::~CommandComputeKernel() {
    kernel->decRefInternal();
-
-    auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
-    if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
-        for (cl_event eventFromWaitList : eventsWaitlist) {
-            auto event = castToObjectOrAbort<Event>(eventFromWaitList);
-            event->decRefInternal();
-        }
-    }
 }

 CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) {
@ -226,7 +218,20 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
    return completionStamp;
 }

-void CommandComputeKernel::setEventsRequest(EventsRequest &eventsRequest) {
+CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
+    if (terminated) {
+        return completionStamp;
+    }
+
+    auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
+    completionStamp.taskCount = commandStreamReceiver.peekTaskCount();
+    completionStamp.taskLevel = commandStreamReceiver.peekTaskLevel();
+    completionStamp.flushStamp = commandStreamReceiver.obtainCurrentFlushStamp();
+
+    return completionStamp;
+}
+
+void Command::setEventsRequest(EventsRequest &eventsRequest) {
    this->eventsRequest = eventsRequest;
    if (eventsRequest.numEventsInWaitList > 0) {
        eventsWaitlist.resize(eventsRequest.numEventsInWaitList);
@ -236,7 +241,7 @@ void CommandComputeKernel::setEventsRequest(EventsRequest &eventsRequest) {
    }
 }

-void CommandComputeKernel::setTimestampPacketNode(TimestampPacketContainer &current, TimestampPacketContainer &previous) {
+void Command::setTimestampPacketNode(TimestampPacketContainer &current, TimestampPacketContainer &previous) {
    currentTimestampPacketNodes = std::make_unique<TimestampPacketContainer>();
    currentTimestampPacketNodes->assignAndIncrementNodesRefCounts(current);

@ -244,15 +249,18 @@ void CommandComputeKernel::setTimestampPacketNode(TimestampPacketContainer &curr
    previousTimestampPacketNodes->assignAndIncrementNodesRefCounts(previous);
 }

-CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
-    if (terminated) {
-        return completionStamp;
+Command::~Command() {
+    auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
+    if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
+        for (cl_event &eventFromWaitList : eventsWaitlist) {
+            auto event = castToObjectOrAbort<Event>(eventFromWaitList);
+            event->decRefInternal();
+        }
    }
-
-    completionStamp.taskCount = csr.peekTaskCount();
-    completionStamp.taskLevel = csr.peekTaskLevel();
-    completionStamp.flushStamp = csr.obtainCurrentFlushStamp();
-
-    return completionStamp;
 }
+
+Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {}
+
+Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation)
+    : commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {}
 } // namespace NEO
--- a/runtime/helpers/task_information.h
+++ b/runtime/helpers/task_information.h
@ -34,37 +34,6 @@ enum MapOperationType {
    UNMAP
 };

-class Command : public IFNode<Command> {
-  public:
-    // returns command's taskCount obtained from completion stamp
-    //   as acquired from command stream receiver
-    virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0;
-
-    virtual ~Command() = default;
-    virtual LinearStream *getCommandStream() {
-        return nullptr;
-    }
-    TagNode<HwTimeStamps> *timestamp = nullptr;
-    CompletionStamp completionStamp = {};
-};
-
-class CommandMapUnmap : public Command {
-  public:
-    CommandMapUnmap(MapOperationType op, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
-                    CommandStreamReceiver &csr, CommandQueue &cmdQ);
-    ~CommandMapUnmap() override = default;
-    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
-
-  private:
-    MemObj &memObj;
-    MemObjSizeArray copySize;
-    MemObjOffsetArray copyOffset;
-    bool readOnly;
-    CommandStreamReceiver &csr;
-    CommandQueue &cmdQ;
-    MapOperationType op;
-};
-
 struct KernelOperation {
  protected:
    struct ResourceCleaner {
@ -107,11 +76,55 @@ struct KernelOperation {
    size_t surfaceStateHeapSizeEM = 0;
 };

+class Command : public IFNode<Command> {
+  public:
+    // returns command's taskCount obtained from completion stamp
+    //   as acquired from command stream receiver
+    virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0;
+
+    Command() = delete;
+    Command(CommandQueue &commandQueue);
+    Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation);
+
+    virtual ~Command();
+    virtual LinearStream *getCommandStream() {
+        return nullptr;
+    }
+    void setTimestampPacketNode(TimestampPacketContainer &current, TimestampPacketContainer &previous);
+    void setEventsRequest(EventsRequest &eventsRequest);
+
+    TagNode<HwTimeStamps> *timestamp = nullptr;
+    CompletionStamp completionStamp = {};
+
+  protected:
+    CommandQueue &commandQueue;
+    std::unique_ptr<KernelOperation> kernelOperation;
+    std::unique_ptr<TimestampPacketContainer> currentTimestampPacketNodes;
+    std::unique_ptr<TimestampPacketContainer> previousTimestampPacketNodes;
+    EventsRequest eventsRequest = {0, nullptr, nullptr};
+    std::vector<cl_event> eventsWaitlist;
+};
+
+class CommandMapUnmap : public Command {
+  public:
+    CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
+                    CommandQueue &commandQueue);
+    ~CommandMapUnmap() override = default;
+    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
+
+  private:
+    MemObj &memObj;
+    MemObjSizeArray copySize;
+    MemObjOffsetArray copyOffset;
+    bool readOnly;
+    MapOperationType operationType;
+};
+
 class CommandComputeKernel : public Command {
  public:
    CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> kernelResources, std::vector<Surface *> &surfaces,
                         bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> printfHandler,
-                         PreemptionMode preemptionMode, Kernel *kernel = nullptr, uint32_t kernelCount = 0);
+                         PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);

    ~CommandComputeKernel() override;

@ -119,12 +132,7 @@ class CommandComputeKernel : public Command {

    LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); }

-    void setTimestampPacketNode(TimestampPacketContainer &current, TimestampPacketContainer &previous);
-    void setEventsRequest(EventsRequest &eventsRequest);
-
  protected:
-    CommandQueue &commandQueue;
-    std::unique_ptr<KernelOperation> kernelOperation;
    std::vector<Surface *> surfaces;
    bool flushDC;
    bool slmUsed;
@ -133,27 +141,12 @@ class CommandComputeKernel : public Command {
    Kernel *kernel;
    uint32_t kernelCount;
    PreemptionMode preemptionMode;
-    std::unique_ptr<TimestampPacketContainer> currentTimestampPacketNodes;
-    std::unique_ptr<TimestampPacketContainer> previousTimestampPacketNodes;
-    EventsRequest eventsRequest = {0, nullptr, nullptr};
-    std::vector<cl_event> eventsWaitlist;
 };

 class CommandMarker : public Command {
  public:
-    CommandMarker(CommandQueue &cmdQ, CommandStreamReceiver &csr, uint32_t clCommandType, uint32_t commandSize)
-        : cmdQ(cmdQ), csr(csr), clCommandType(clCommandType), commandSize(commandSize) {
-        (void)this->cmdQ;
-        (void)this->clCommandType;
-        (void)this->commandSize;
-    }
+    using Command::Command;

    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
-
-  private:
-    CommandQueue &cmdQ;
-    CommandStreamReceiver &csr;
-    uint32_t clCommandType;
-    uint32_t commandSize;
 };
 } // namespace NEO
--- a/unit_tests/event/event_tests.cpp
+++ b/unit_tests/event/event_tests.cpp
@ -595,15 +595,15 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
 }

 TEST_F(InternalsEventTest, processBlockedCommandsMapOperation) {
+    auto pCmdQ = make_releaseable<CommandQueue>(mockContext, pDevice, nullptr);
    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
-    CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);

    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    auto buffer = new MockBuffer;

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, *buffer, size, offset, false, csr, *pCmdQ)));
+    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ)));

    auto taskLevelBefore = csr.peekTaskLevel();

@ -613,19 +613,18 @@ TEST_F(InternalsEventTest, processBlockedCommandsMapOperation) {

    EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter);
    buffer->decRefInternal();
-    delete pCmdQ;
 }

 TEST_F(InternalsEventTest, processBlockedCommandsMapOperationNonZeroCopyBuffer) {
+    auto pCmdQ = make_releaseable<CommandQueue>(mockContext, pDevice, nullptr);
    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
-    CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, 0);

    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    auto buffer = new UnalignedBuffer;

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, *buffer, size, offset, false, csr, *pCmdQ)));
+    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ)));

    auto taskLevelBefore = csr.peekTaskLevel();

@ -635,7 +634,6 @@ TEST_F(InternalsEventTest, processBlockedCommandsMapOperationNonZeroCopyBuffer)

    EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter);
    buffer->decRefInternal();
-    delete pCmdQ;
 }

 uint32_t commands[] = {
@ -712,7 +710,7 @@ TEST_F(InternalsEventTest, GIVENProfilingWHENMapOperationTHENTimesSet) {

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event->setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, buffer, size, offset, false, csr, *pCmdQ)));
+    event->setCommand(std::unique_ptr<Command>(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ)));

    auto taskLevelBefore = csr.peekTaskLevel();

@ -729,16 +727,16 @@ TEST_F(InternalsEventTest, GIVENProfilingWHENMapOperationTHENTimesSet) {
 }

 TEST_F(InternalsEventTest, processBlockedCommandsUnMapOperation) {
-    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
-    CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, props);
+    auto pCmdQ = make_releaseable<CommandQueue>(mockContext, pDevice, props);
+    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);

    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    auto buffer = new UnalignedBuffer;

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, csr, *pCmdQ)));
+    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ)));

    auto taskLevelBefore = csr.peekTaskLevel();

@ -748,22 +746,20 @@ TEST_F(InternalsEventTest, processBlockedCommandsUnMapOperation) {

    EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter);
    buffer->decRefInternal();
-    delete pCmdQ;
 }

 TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMemObjectReference) {
-    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
    auto pCmdQ = std::make_unique<CommandQueue>(mockContext, pDevice, props);
+    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);

-    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    auto buffer = new UnalignedBuffer;

    auto currentBufferRefInternal = buffer->getRefInternalCount();

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, csr, *pCmdQ)));
+    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ)));
    EXPECT_EQ(currentBufferRefInternal + 1, buffer->getRefInternalCount());

    event.submitCommand(false);
@ -772,16 +768,16 @@ TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMem
    buffer->decRefInternal();
 }
 TEST_F(InternalsEventTest, processBlockedCommandsUnMapOperationNonZeroCopyBuffer) {
-    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
-    CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, props);
+    auto pCmdQ = std::make_unique<CommandQueue>(mockContext, pDevice, props);
+    MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);

    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    auto buffer = new UnalignedBuffer;

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, csr, *pCmdQ)));
+    event.setCommand(std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ)));

    auto taskLevelBefore = csr.peekTaskLevel();

@ -791,7 +787,6 @@ TEST_F(InternalsEventTest, processBlockedCommandsUnMapOperationNonZeroCopyBuffer

    EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter);
    buffer->decRefInternal();
-    delete pCmdQ;
 }

 HWTEST_F(InternalsEventTest, givenCpuProfilingPathWhenEnqueuedMarkerThenDontUseTimeStampNode) {
@ -800,9 +795,7 @@ HWTEST_F(InternalsEventTest, givenCpuProfilingPathWhenEnqueuedMarkerThenDontUseT
    MockEvent<Event> *event = new MockEvent<Event>(pCmdQ, CL_COMMAND_MARKER, 0, 0);
    event->setCPUProfilingPath(true);

-    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
-
-    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ, csr, CL_COMMAND_MARKER, 4096u)));
+    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ)));

    event->submitCommand(false);

@ -845,9 +838,7 @@ HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWh
    MockEvent<Event> *event = new MockEvent<Event>(pCmdQ, CL_COMMAND_MARKER, 0, 0);
    event->setCPUProfilingPath(true);

-    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
-
-    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ, csr, CL_COMMAND_MARKER, 4096u)));
+    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ)));

    event->submitCommand(false);

@ -873,9 +864,8 @@ HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWh
    ASSERT_NE(nullptr, perfCounter);
    HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess;
    ASSERT_NE(nullptr, timeStamps);
-    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();

-    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ, csr, CL_COMMAND_MARKER, 4096u)));
+    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ)));

    event->submitCommand(false);

@ -984,47 +974,44 @@ HWTEST_F(InternalsEventTest, GivenBufferWithoutZeroCopyOnCommandMapOrUnmapFlushe
    };

    int32_t executionStamp = 0;
+    auto csr = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment);
+    pDevice->resetCommandStreamReceiver(csr);
+
    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
-    CommandQueue *pCmdQ = new CommandQueue(mockContext, pDevice, props);
+    auto pCmdQ = make_releaseable<CommandQueue>(mockContext, pDevice, props);
+
    MockNonZeroCopyBuff buffer(executionStamp);
-    MockCsr<FamilyType> csr(executionStamp, *pDevice->executionEnvironment);
-    csr.setTagAllocation(pDevice->getDefaultEngine().commandStreamReceiver->getTagAllocation());
-    csr.createPreemptionAllocation();
-    csr.setupContext(*pDevice->getDefaultEngine().osContext);

    MemObjSizeArray size = {{4, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    auto commandMap = std::unique_ptr<Command>(new CommandMapUnmap(MAP, buffer, size, offset, false, csr, *pCmdQ));
+    auto commandMap = std::unique_ptr<Command>(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ));
    EXPECT_EQ(0, executionStamp);
-    EXPECT_EQ(-1, csr.flushTaskStamp);
+    EXPECT_EQ(-1, csr->flushTaskStamp);
    EXPECT_EQ(-1, buffer.dataTransferedStamp);

-    auto latestSentFlushTaskCount = csr.peekLatestSentTaskCount();
+    auto latestSentFlushTaskCount = csr->peekLatestSentTaskCount();

    commandMap->submit(0, false);
    EXPECT_EQ(1, executionStamp);
-    EXPECT_EQ(0, csr.flushTaskStamp);
+    EXPECT_EQ(0, csr->flushTaskStamp);
    EXPECT_EQ(1, buffer.dataTransferedStamp);
-    auto latestSentFlushTaskCountAfterSubmit = csr.peekLatestSentTaskCount();
+    auto latestSentFlushTaskCountAfterSubmit = csr->peekLatestSentTaskCount();
    EXPECT_GT(latestSentFlushTaskCountAfterSubmit, latestSentFlushTaskCount);

    executionStamp = 0;
-    csr.flushTaskStamp = -1;
+    csr->flushTaskStamp = -1;
    buffer.dataTransferedStamp = -1;
    buffer.swapCopyDirection();

-    auto commandUnMap = std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, buffer, size, offset, false, csr, *pCmdQ));
+    auto commandUnMap = std::unique_ptr<Command>(new CommandMapUnmap(UNMAP, buffer, size, offset, false, *pCmdQ));
    EXPECT_EQ(0, executionStamp);
-    EXPECT_EQ(-1, csr.flushTaskStamp);
+    EXPECT_EQ(-1, csr->flushTaskStamp);
    EXPECT_EQ(-1, buffer.dataTransferedStamp);
    commandUnMap->submit(0, false);
    EXPECT_EQ(1, executionStamp);
-    EXPECT_EQ(0, csr.flushTaskStamp);
+    EXPECT_EQ(0, csr->flushTaskStamp);
    EXPECT_EQ(1, buffer.dataTransferedStamp);
    EXPECT_EQ(nullptr, commandUnMap->getCommandStream());
-
-    pCmdQ->getGpgpuCommandStreamReceiver().setTagAllocation(nullptr);
-    delete pCmdQ;
 }

 TEST(EventCallback, CallbackAfterStatusOverrideUsesNewStatus) {
@ -1466,7 +1453,7 @@ HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) {

    FlushStamp expectedFlushStamp = 0;
    EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp());
-    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ.get(), csr, CL_COMMAND_MARKER, 4096u)));
+    event->setCommand(std::unique_ptr<Command>(new CommandMarker(*pCmdQ)));
    event->submitCommand(false);
    EXPECT_EQ(csr.flushStamp->peekStamp(), event->flushStamp->peekStamp());
    delete event;
--- a/unit_tests/helpers/task_information_tests.cpp
+++ b/unit_tests/helpers/task_information_tests.cpp
@ -29,7 +29,7 @@ TEST(CommandTest, mapUnmapSubmitWithoutTerminateFlagFlushesCsr) {

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    std::unique_ptr<Command> command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, csr, *cmdQ.get()));
+    std::unique_ptr<Command> command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ.get()));
    CompletionStamp completionStamp = command->submit(20, false);

    auto expectedTaskCount = initialTaskCount + 1;
@ -46,7 +46,7 @@ TEST(CommandTest, mapUnmapSubmitWithTerminateFlagAbortsFlush) {

    MemObjSizeArray size = {{1, 1, 1}};
    MemObjOffsetArray offset = {{0, 0, 0}};
-    std::unique_ptr<Command> command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, csr, *cmdQ.get()));
+    std::unique_ptr<Command> command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ.get()));
    CompletionStamp completionStamp = command->submit(20, true);

    auto submitTaskCount = csr.peekTaskCount();
@ -63,7 +63,7 @@ TEST(CommandTest, markerSubmitWithoutTerminateFlagDosntFlushCsr) {
    MockBuffer buffer;

    auto initialTaskCount = csr.peekTaskCount();
-    std::unique_ptr<Command> command(new CommandMarker(*cmdQ.get(), csr, CL_COMMAND_MARKER, 0));
+    std::unique_ptr<Command> command(new CommandMarker(*cmdQ));
    CompletionStamp completionStamp = command->submit(20, false);

    EXPECT_EQ(initialTaskCount, completionStamp.taskCount);
@ -77,7 +77,7 @@ TEST(CommandTest, markerSubmitWithTerminateFlagAbortsFlush) {
    MockBuffer buffer;

    auto initialTaskCount = csr.peekTaskCount();
-    std::unique_ptr<Command> command(new CommandMarker(*cmdQ.get(), csr, CL_COMMAND_MARKER, 0));
+    std::unique_ptr<Command> command(new CommandMarker(*cmdQ));
    CompletionStamp completionStamp = command->submit(20, true);

    auto submitTaskCount = csr.peekTaskCount();