From cc1f4bed609e61139bbe7ee5b4f95aaaf076d9e3 Mon Sep 17 00:00:00 2001
From: Pawel Wilma <pawel.wilma@intel.com>
Date: Thu, 20 Dec 2018 16:32:47 +0100
Subject: [PATCH] Revert "Use GPU instead of CPU address in programming
 commands for HwTim(...)"

This reverts commit 6202b2222bb5ac24104a2f607f861b0a9179a94d.
"Use GPU instead of CPU address in programming commands for HwTimeStamps"

Change-Id: I085382d95538ae41068a21c628d606039bf9cdf0
---
 runtime/command_queue/enqueue_common.h         |  4 ++--
 runtime/command_queue/gpgpu_walker.h           |  4 ++--
 runtime/command_queue/gpgpu_walker.inl         | 10 +++++-----
 runtime/command_queue/hardware_interface.h     |  6 +++---
 runtime/command_queue/hardware_interface.inl   |  2 +-
 .../command_queue/hardware_interface_base.inl  |  4 ++--
 runtime/device_queue/device_queue.cpp          |  4 ++--
 runtime/device_queue/device_queue.h            |  6 ++----
 runtime/device_queue/device_queue_hw.h         |  2 +-
 runtime/device_queue/device_queue_hw.inl       |  5 ++---
 runtime/event/event.cpp                        |  2 +-
 runtime/helpers/task_information.h             |  4 +---
 .../device_queue/device_queue_hw_tests.cpp     | 18 ++++++++----------
 .../submit_blocked_parent_kernel_tests.cpp     | 12 ++++++------
 14 files changed, 38 insertions(+), 45 deletions(-)
diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h
index e211340414..8991964c72 100644
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -146,7 +146,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
     auto devQueue = this->getContext().getDefaultDeviceQueue();
     DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
 
-    TagNode<HwTimeStamps> *hwTimeStamps = nullptr;
+    HwTimeStamps *hwTimeStamps = nullptr;
 
     auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
 
@@ -230,7 +230,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
             }
             if (this->isProfilingEnabled()) {
                 // Get allocation for timestamps
-                hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
+                hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode()->tag;
                 if (this->isPerfCountersEnabled()) {
                     hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
                     // PERF COUNTER: copy current configuration from queue to event
diff --git a/runtime/command_queue/gpgpu_walker.h b/runtime/command_queue/gpgpu_walker.h
index 0e02d2b601..8e02d4a4da 100644
--- a/runtime/command_queue/gpgpu_walker.h
+++ b/runtime/command_queue/gpgpu_walker.h
@@ -139,11 +139,11 @@ class GpgpuWalkerHelper {
         const iOpenCL::SPatchThreadPayload &threadPayload);
 
     static void dispatchProfilingCommandsStart(
-        TagNode<HwTimeStamps> &hwTimeStamps,
+        HwTimeStamps &hwTimeStamps,
         OCLRT::LinearStream *commandStream);
 
     static void dispatchProfilingCommandsEnd(
-        TagNode<HwTimeStamps> &hwTimeStamps,
+        HwTimeStamps &hwTimeStamps,
         OCLRT::LinearStream *commandStream);
 
     static void dispatchPerfCountersNoopidRegisterCommands(
diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl
index c16deec6e2..4b6275f10d 100644
--- a/runtime/command_queue/gpgpu_walker.inl
+++ b/runtime/command_queue/gpgpu_walker.inl
@@ -101,17 +101,17 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
 
 template <typename GfxFamily>
 void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
-    TagNode<HwTimeStamps> &hwTimeStamps,
+    HwTimeStamps &hwTimeStamps,
     OCLRT::LinearStream *commandStream) {
     using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
 
     // PIPE_CONTROL for global timestamp
-    uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->GlobalStartTS, hwTimeStamps.tag);
+    uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
 
     PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
 
     //MI_STORE_REGISTER_MEM for context local timestamp
-    TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextStartTS, hwTimeStamps.tag);
+    TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
 
     //low part
     auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
@@ -122,7 +122,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
 
 template <typename GfxFamily>
 void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
-    TagNode<HwTimeStamps> &hwTimeStamps,
+    HwTimeStamps &hwTimeStamps,
     OCLRT::LinearStream *commandStream) {
 
     using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
@@ -133,7 +133,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
     pPipeControlCmd->setCommandStreamerStallEnable(true);
 
     //MI_STORE_REGISTER_MEM for context local timestamp
-    uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextEndTS, hwTimeStamps.tag);
+    uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
 
     //low part
     auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
diff --git a/runtime/command_queue/hardware_interface.h b/runtime/command_queue/hardware_interface.h
index bc46e4ff82..ceb34d310c 100644
--- a/runtime/command_queue/hardware_interface.h
+++ b/runtime/command_queue/hardware_interface.h
@@ -40,7 +40,7 @@ class HardwareInterface {
         cl_uint numEventsInWaitList,
         const cl_event *eventWaitList,
         KernelOperation **blockedCommandsData,
-        TagNode<HwTimeStamps> *hwTimeStamps,
+        HwTimeStamps *hwTimeStamps,
         HwPerfCounter *hwPerfCounter,
         TimestampPacketContainer *previousTimestampPacketNodes,
         TimestampPacketContainer *currentTimestampPacketNodes,
@@ -69,13 +69,13 @@ class HardwareInterface {
     static void dispatchProfilingPerfStartCommands(
         const DispatchInfo &dispatchInfo,
         const MultiDispatchInfo &multiDispatchInfo,
-        TagNode<HwTimeStamps> *hwTimeStamps,
+        HwTimeStamps *hwTimeStamps,
         HwPerfCounter *hwPerfCounter,
         LinearStream *commandStream,
         CommandQueue &commandQueue);
 
     static void dispatchProfilingPerfEndCommands(
-        TagNode<HwTimeStamps> *hwTimeStamps,
+        HwTimeStamps *hwTimeStamps,
         HwPerfCounter *hwPerfCounter,
         LinearStream *commandStream,
         CommandQueue &commandQueue);
diff --git a/runtime/command_queue/hardware_interface.inl b/runtime/command_queue/hardware_interface.inl
index 021bcb8dec..03aeb30261 100644
--- a/runtime/command_queue/hardware_interface.inl
+++ b/runtime/command_queue/hardware_interface.inl
@@ -19,7 +19,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
     cl_uint numEventsInWaitList,
     const cl_event *eventWaitList,
     KernelOperation **blockedCommandsData,
-    TagNode<HwTimeStamps> *hwTimeStamps,
+    HwTimeStamps *hwTimeStamps,
     HwPerfCounter *hwPerfCounter,
     TimestampPacketContainer *previousTimestampPacketNodes,
     TimestampPacketContainer *currentTimestampPacketNodes,
diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl
index 33c5645794..528a5d36a6 100644
--- a/runtime/command_queue/hardware_interface_base.inl
+++ b/runtime/command_queue/hardware_interface_base.inl
@@ -59,7 +59,7 @@ template <typename GfxFamily>
 inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
     const DispatchInfo &dispatchInfo,
     const MultiDispatchInfo &multiDispatchInfo,
-    TagNode<HwTimeStamps> *hwTimeStamps,
+    HwTimeStamps *hwTimeStamps,
     HwPerfCounter *hwPerfCounter,
     LinearStream *commandStream,
     CommandQueue &commandQueue) {
@@ -77,7 +77,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
 
 template <typename GfxFamily>
 inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
-    TagNode<HwTimeStamps> *hwTimeStamps,
+    HwTimeStamps *hwTimeStamps,
     HwPerfCounter *hwPerfCounter,
     LinearStream *commandStream,
     CommandQueue &commandQueue) {
diff --git a/runtime/device_queue/device_queue.cpp b/runtime/device_queue/device_queue.cpp
index 7065a33f1c..68580259dd 100644
--- a/runtime/device_queue/device_queue.cpp
+++ b/runtime/device_queue/device_queue.cpp
@@ -143,7 +143,7 @@ void DeviceQueue::initDeviceQueue() {
     igilEventPool->m_size = caps.maxOnDeviceEvents;
 }
 
-void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
+void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
     setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
     addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
 }
@@ -152,7 +152,7 @@ void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHea
     return;
 }
 
-void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
+void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
     return;
 }
 
diff --git a/runtime/device_queue/device_queue.h b/runtime/device_queue/device_queue.h
index a8311781d4..de8765bcc8 100644
--- a/runtime/device_queue/device_queue.h
+++ b/runtime/device_queue/device_queue.h
@@ -22,8 +22,6 @@ class Event;
 struct MultiDispatchInfo;
 class SchedulerKernel;
 struct HwTimeStamps;
-template <class T>
-struct TagNode;
 
 template <>
 struct OpenCLObjectMapper<_device_queue> {
@@ -68,10 +66,10 @@ class DeviceQueue : public BaseObject<_device_queue> {
                                size_t paramValueSize, void *paramValue,
                                size_t *paramValueSizeRet);
 
-    void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
+    void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
 
     virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
-    virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount);
+    virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
 
     MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
         auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
diff --git a/runtime/device_queue/device_queue_hw.h b/runtime/device_queue/device_queue_hw.h
index c7921922e0..ea5135fdd4 100644
--- a/runtime/device_queue/device_queue_hw.h
+++ b/runtime/device_queue/device_queue_hw.h
@@ -55,7 +55,7 @@ class DeviceQueueHw : public DeviceQueue {
 
     void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
 
-    void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override;
+    void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
     void resetDeviceQueue() override;
     void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
 
diff --git a/runtime/device_queue/device_queue_hw.inl b/runtime/device_queue/device_queue_hw.inl
index 52e27d3596..8e51a656ed 100644
--- a/runtime/device_queue/device_queue_hw.inl
+++ b/runtime/device_queue/device_queue_hw.inl
@@ -12,7 +12,6 @@
 #include "runtime/helpers/preamble.h"
 #include "runtime/helpers/string.h"
 #include "runtime/memory_manager/memory_manager.h"
-#include "runtime/utilities/tag_allocator.h"
 
 namespace OCLRT {
 template <typename GfxFamily>
@@ -202,7 +201,7 @@ void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
 }
 
 template <typename GfxFamily>
-void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
+void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
     // CleanUp Section
     auto offset = slbCS.getUsed();
     auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
@@ -216,7 +215,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
     using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
 
     if (hwTimeStamp != nullptr) {
-        uint64_t TimeStampAddress = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
+        uint64_t TimeStampAddress = (uint64_t)((uintptr_t) & (hwTimeStamp->ContextCompleteTS));
         igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
 
         addProfilingEndCmds(TimeStampAddress);
diff --git a/runtime/event/event.cpp b/runtime/event/event.cpp
index 4bf0702aee..2d25a502a4 100644
--- a/runtime/event/event.cpp
+++ b/runtime/event/event.cpp
@@ -451,7 +451,7 @@ void Event::submitCommand(bool abortTasks) {
         if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
             if (timeStampNode) {
                 this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
-                cmdToProcess->timestamp = timeStampNode;
+                cmdToProcess->timestamp = timeStampNode->tag;
             }
             if (profilingCpuPath) {
                 setSubmitTimeStamp();
diff --git a/runtime/helpers/task_information.h b/runtime/helpers/task_information.h
index 16ef736700..abde814543 100644
--- a/runtime/helpers/task_information.h
+++ b/runtime/helpers/task_information.h
@@ -26,8 +26,6 @@ class Surface;
 class PrintfHandler;
 struct HwTimeStamps;
 class TimestampPacketContainer;
-template <class T>
-struct TagNode;
 
 enum MapOperationType {
     MAP,
@@ -44,7 +42,7 @@ class Command : public IFNode<Command> {
     virtual LinearStream *getCommandStream() {
         return nullptr;
     }
-    TagNode<HwTimeStamps> *timestamp = nullptr;
+    HwTimeStamps *timestamp = nullptr;
     CompletionStamp completionStamp = {};
 };
 
diff --git a/unit_tests/device_queue/device_queue_hw_tests.cpp b/unit_tests/device_queue/device_queue_hw_tests.cpp
index 0bf90281d7..ec180e9b27 100644
--- a/unit_tests/device_queue/device_queue_hw_tests.cpp
+++ b/unit_tests/device_queue/device_queue_hw_tests.cpp
@@ -7,7 +7,6 @@
 
 #include "hw_cmds.h"
 #include "runtime/helpers/options.h"
-#include "runtime/utilities/tag_allocator.h"
 #include "unit_tests/fixtures/device_host_queue_fixture.h"
 #include "unit_tests/fixtures/execution_model_fixture.h"
 #include "unit_tests/helpers/hw_parse.h"
@@ -353,19 +352,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, AddEMCleanupSectionWithProfiling) {
     MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
     uint32_t taskCount = 7;
 
-    auto hwTimeStamp = pCommandQueue->getCommandStreamReceiver().getEventTsAllocator()->getTag();
+    HwTimeStamps hwTimeStamp;
     mockDeviceQueueHw->buildSlbDummyCommands();
-    mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, taskCount);
+    mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, &hwTimeStamp, taskCount);
 
-    uint32_t eventTimestampAddrLow = static_cast<uint32_t>(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
-    uint32_t eventTimestampAddrHigh = static_cast<uint32_t>((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
+    uint32_t eventTimestampLow = (uint32_t)(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
+    uint32_t eventTimestampHigh = (uint32_t)((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
 
-    uint64_t contextCompleteAddr = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
-    uint32_t contextCompleteAddrLow = static_cast<uint32_t>(contextCompleteAddr & 0xFFFFFFFF);
-    uint32_t contextCompleteAddrHigh = static_cast<uint32_t>((contextCompleteAddr & 0xFFFFFFFF00000000) >> 32);
+    uint32_t contextCompleteLow = (uint32_t)((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF);
+    uint32_t contextCompleteHigh = (uint32_t)(((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF00000000) >> 32);
 
-    EXPECT_EQ(contextCompleteAddrLow, eventTimestampAddrLow);
-    EXPECT_EQ(contextCompleteAddrHigh, eventTimestampAddrHigh);
+    EXPECT_EQ(contextCompleteLow, eventTimestampLow);
+    EXPECT_EQ(contextCompleteHigh, eventTimestampHigh);
 
     HardwareParse hwParser;
     auto *slbCS = mockDeviceQueueHw->getSlbCS();
diff --git a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
index 4122645e2d..e60c2f16a1 100644
--- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
@@ -8,7 +8,6 @@
 #include "runtime/command_queue/gpgpu_walker.h"
 #include "runtime/command_queue/hardware_interface.h"
 #include "runtime/event/hw_timestamps.h"
-#include "runtime/utilities/tag_allocator.h"
 #include "runtime/helpers/kernel_commands.h"
 #include "runtime/helpers/task_information.h"
 #include "unit_tests/mocks/mock_command_queue.h"
@@ -57,9 +56,9 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
         indirectStateSetup = true;
         return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
     }
-    void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override {
+    void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
         cleanupSectionAdded = true;
-        timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tag : nullptr;
+        timestampAddedInCleanupSection = hwTimeStamp;
         return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
     }
     void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
@@ -250,12 +249,13 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
         std::vector<Surface *> surfaces;
         auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
 
-        auto timestamp = pCmdQ->getCommandStreamReceiver().getEventTsAllocator()->getTag();
-        cmdComputeKernel->timestamp = timestamp;
+        HwTimeStamps timestamp;
+
+        cmdComputeKernel->timestamp = &timestamp;
         cmdComputeKernel->submit(0, false);
 
         EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
-        EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tag);
+        EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, &timestamp);
 
         delete cmdComputeKernel;
         delete parentKernel;