diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
index 85a1e43315..292a92768f 100644
--- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
+++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
@@ -983,7 +983,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncByMiFlushDw(
     }
 
     uint64_t postSyncAddress = this->csr->getTagAllocation()->getGpuAddress();
-    uint32_t postSyncData = this->csr->peekTaskCount() + 1;
+    TaskCountType postSyncData = this->csr->peekTaskCount() + 1;
     const auto &hwInfo = this->device->getHwInfo();
 
     NEO::MiFlushArgs args;
@@ -1003,7 +1003,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncRegular(
     }
 
     uint64_t postSyncAddress = this->csr->getTagAllocation()->getGpuAddress();
-    uint32_t postSyncData = this->csr->peekTaskCount() + 1;
+    TaskCountType postSyncData = this->csr->peekTaskCount() + 1;
     const auto &hwInfo = this->device->getHwInfo();
 
     NEO::PipeControlArgs args;
diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h
index cb20b3f1d0..484ec077a2 100644
--- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h
+++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h
@@ -10,6 +10,7 @@
 #include "shared/source/command_container/cmdcontainer.h"
 #include "shared/source/command_stream/linear_stream.h"
 #include "shared/source/command_stream/submission_status.h"
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/command_stream/wait_status.h"
 #include "shared/source/helpers/completion_stamp.h"
 
@@ -46,16 +47,16 @@ struct CommandQueueImp : public CommandQueue {
             return buffers[bufferUse];
         }
 
-        void setCurrentFlushStamp(uint32_t taskCount, NEO::FlushStamp flushStamp) {
+        void setCurrentFlushStamp(TaskCountType taskCount, NEO::FlushStamp flushStamp) {
             flushId[bufferUse] = std::make_pair(taskCount, flushStamp);
         }
-        std::pair<uint32_t, NEO::FlushStamp> &getCurrentFlushStamp() {
+        std::pair<TaskCountType, NEO::FlushStamp> &getCurrentFlushStamp() {
             return flushId[bufferUse];
         }
 
       private:
         NEO::GraphicsAllocation *buffers[BUFFER_ALLOCATION::COUNT];
-        std::pair<uint32_t, NEO::FlushStamp> flushId[BUFFER_ALLOCATION::COUNT];
+        std::pair<TaskCountType, NEO::FlushStamp> flushId[BUFFER_ALLOCATION::COUNT];
         BUFFER_ALLOCATION bufferUse = BUFFER_ALLOCATION::FIRST;
     };
     static constexpr size_t defaultQueueCmdBufferSize = 128 * MemoryConstants::kiloByte;
@@ -76,7 +77,7 @@ struct CommandQueueImp : public CommandQueue {
 
     Device *getDevice() { return device; }
 
-    uint32_t getTaskCount() { return taskCount; }
+    TaskCountType getTaskCount() { return taskCount; }
 
     NEO::CommandStreamReceiver *getCsr() { return csr; }
 
@@ -105,7 +106,7 @@ struct CommandQueueImp : public CommandQueue {
     NEO::CommandStreamReceiver *csr = nullptr;
     NEO::LinearStream commandStream{};
 
-    std::atomic<uint32_t> taskCount{0};
+    std::atomic<TaskCountType> taskCount{0};
 
     bool useKmdWaitFunction = false;
 };
diff --git a/level_zero/core/source/fence/fence.h b/level_zero/core/source/fence/fence.h
index 9ac34370f5..d4e6fbd155 100644
--- a/level_zero/core/source/fence/fence.h
+++ b/level_zero/core/source/fence/fence.h
@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
+
 #include <level_zero/ze_api.h>
 
 #include <chrono>
@@ -39,7 +41,7 @@ struct Fence : _ze_fence_handle_t {
 
     std::chrono::microseconds gpuHangCheckPeriod{500'000};
     CommandQueueImp *cmdQueue;
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
 };
 
 } // namespace L0
diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
index b7dbf3a14b..fdf7989956 100644
--- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp
@@ -109,7 +109,7 @@ HWTEST_F(CommandQueueCreate, givenGpuHangOnSecondReserveWhenReservingLinearStrea
     auto firstAllocation = commandQueue->commandStream.getGraphicsAllocation();
     EXPECT_EQ(firstAllocation, commandQueue->buffers.getCurrentBufferAllocation());
 
-    uint32_t currentTaskCount = 33u;
+    TaskCountType currentTaskCount = 33u;
     auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
     csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
     csr.waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready;
@@ -150,7 +150,7 @@ HWTEST_F(CommandQueueCreate, whenReserveLinearStreamThenBufferAllocationSwitched
     auto firstAllocation = commandQueue->commandStream.getGraphicsAllocation();
     EXPECT_EQ(firstAllocation, commandQueue->buffers.getCurrentBufferAllocation());
 
-    uint32_t currentTaskCount = 33u;
+    TaskCountType currentTaskCount = 33u;
     auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
     csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
 
@@ -624,8 +624,8 @@ HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainer
                                                           false,
                                                           returnValue));
     ResidencyContainer container;
-    uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount();
-    uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
+    TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount();
+    TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
     NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false);
     EXPECT_EQ(csr->makeResidentCalledTimes, 0u);
     EXPECT_EQ(ret, NEO::SubmissionStatus::SUCCESS);
@@ -649,8 +649,8 @@ HWTEST_F(CommandQueueCreate, givenCommandStreamReceiverFailsThenSubmitBatchBuffe
                                                           false,
                                                           returnValue));
     ResidencyContainer container;
-    uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount();
-    uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
+    TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount();
+    TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
     NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false);
     EXPECT_EQ(ret, NEO::SubmissionStatus::FAILED);
     EXPECT_EQ(peekTaskCountBefore, commandQueue->csr->peekTaskCount());
@@ -1263,7 +1263,7 @@ HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenWaitForCompl
     auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
     commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
     auto commandListHandle = commandList->toHandle();
-    uint32_t flushedTaskCountPrior = csr->peekTaskCount();
+    TaskCountType flushedTaskCountPrior = csr->peekTaskCount();
     csr->setLatestFlushedTaskCount(flushedTaskCountPrior);
     auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
     EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res);
@@ -1902,7 +1902,7 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD
 struct SVMAllocsManagerMock : public NEO::SVMAllocsManager {
     using SVMAllocsManager::mtxForIndirectAccess;
     SVMAllocsManagerMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {}
-    void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) override {
+    void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount) override {
         makeIndirectAllocationsResidentCalledTimes++;
     }
     void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp
index de55f2ba67..56f54665ec 100644
--- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp
@@ -143,23 +143,23 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
     SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
         : NEO::UltCommandStreamReceiver<GfxFamily>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
         CommandStreamReceiver::tagAddress = &tagAddressData[0];
-        memset(const_cast<uint32_t *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t));
+        memset(const_cast<TagAddressType *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t));
     }
 
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         enableTimeoutSet = params.enableTimeout;
         waitForComplitionCalledTimes++;
         partitionCountSet = this->activePartitions;
         return waitForCompletionWithTimeoutResult;
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, NEO::QueueThrottle throttle) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, NEO::QueueThrottle throttle) override {
         waitForTaskCountWithKmdNotifyFallbackCalled++;
         return NEO::UltCommandStreamReceiver<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, throttle);
     }
 
     static constexpr size_t tagSize = 128;
-    static volatile uint32_t tagAddressData[tagSize];
+    static volatile TagAddressType tagAddressData[tagSize];
     uint32_t waitForComplitionCalledTimes = 0;
     uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
     uint32_t partitionCountSet = 0;
@@ -168,7 +168,7 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
 };
 
 template <typename GfxFamily>
-volatile uint32_t SynchronizeCsr<GfxFamily>::tagAddressData[SynchronizeCsr<GfxFamily>::tagSize];
+volatile TagAddressType SynchronizeCsr<GfxFamily>::tagAddressData[SynchronizeCsr<GfxFamily>::tagSize];
 
 HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {
     auto csr = std::unique_ptr<SynchronizeCsr<FamilyType>>(new SynchronizeCsr<FamilyType>(*device->getNEODevice()->getExecutionEnvironment(),
@@ -301,7 +301,7 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenC
         csr->createPreemptionAllocation();
     }
     EXPECT_NE(0u, csr->getPostSyncWriteOffset());
-    volatile uint32_t *tagAddress = csr->getTagAddress();
+    volatile TagAddressType *tagAddress = csr->getTagAddress();
     for (uint32_t i = 0; i < 2; i++) {
         *tagAddress = 0xFF;
         tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset());
@@ -341,7 +341,7 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartiti
         csr->createPreemptionAllocation();
     }
     EXPECT_NE(0u, csr->getPostSyncWriteOffset());
-    volatile uint32_t *tagAddress = csr->getTagAddress();
+    volatile TagAddressType *tagAddress = csr->getTagAddress();
     for (uint32_t i = 0; i < 2; i++) {
         *tagAddress = 0xFF;
         tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset());
@@ -402,7 +402,7 @@ struct TestCmdQueueCsr : public NEO::UltCommandStreamReceiver<GfxFamily> {
         : NEO::UltCommandStreamReceiver<GfxFamily>(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {
     }
 
-    ADDMETHOD_NOBASE(waitForCompletionWithTimeout, NEO::WaitStatus, NEO::WaitStatus::NotReady, (const WaitParams &params, uint32_t taskCountToWait));
+    ADDMETHOD_NOBASE(waitForCompletionWithTimeout, NEO::WaitStatus, NEO::WaitStatus::NotReady, (const WaitParams &params, TaskCountType taskCountToWait));
 };
 
 HWTEST_F(CommandQueueSynchronizeTest, givenSinglePartitionCountWhenWaitFunctionFailsThenReturnNotReady) {
@@ -490,7 +490,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF
 
     auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
     size_t pipeControlsPostSyncNumber = 0u;
-    uint32_t expectedData = commandQueue->getCsr()->peekTaskCount();
+    TaskCountType expectedData = commandQueue->getCsr()->peekTaskCount();
     for (size_t i = 0; i < pipeControls.size(); i++) {
         auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*pipeControls[i]);
         if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
@@ -792,7 +792,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
                           uint32_t scratchSlot,
                           uint32_t requiredPerThreadScratchSize,
                           uint32_t requiredPerThreadPrivateScratchSize,
-                          uint32_t currentTaskCount,
+                          TaskCountType currentTaskCount,
                           OsContext &osContext,
                           bool &stateBaseAddressDirty,
                           bool &vfeStateDirty) override {
@@ -855,7 +855,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
                           uint32_t scratchSlot,
                           uint32_t requiredPerThreadScratchSize,
                           uint32_t requiredPerThreadPrivateScratchSize,
-                          uint32_t currentTaskCount,
+                          TaskCountType currentTaskCount,
                           OsContext &osContext,
                           bool &stateBaseAddressDirty,
                           bool &vfeStateDirty) override {
@@ -908,7 +908,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenBindlessEnabledThenHand
         void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                    uint32_t requiredPerThreadScratchSize,
                                                    uint32_t requiredPerThreadPrivateScratchSize,
-                                                   uint32_t currentTaskCount,
+                                                   TaskCountType currentTaskCount,
                                                    OsContext &osContext,
                                                    bool &stateBaseAddressDirty,
                                                    bool &vfeStateDirty,
diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp
index 8641b3b789..1ca5cc7470 100644
--- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp
+++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp
@@ -34,8 +34,8 @@ using namespace std::chrono_literals;
 
 namespace CpuIntrinsicsTests {
 extern std::atomic<uint32_t> pauseCounter;
-extern volatile uint32_t *pauseAddress;
-extern uint32_t pauseValue;
+extern volatile TagAddressType *pauseAddress;
+extern TaskCountType pauseValue;
 extern uint32_t pauseOffset;
 extern std::function<void()> setupPauseAddress;
 } // namespace CpuIntrinsicsTests
@@ -902,12 +902,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForN
     const size_t eventPacketSize = event->getSinglePacketSize();
     const size_t eventCompletionOffset = event->getContextStartOffset();
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
     CpuIntrinsicsTests::pauseCounter = 0u;
-    CpuIntrinsicsTests::pauseAddress = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
+    CpuIntrinsicsTests::pauseAddress = static_cast<TagAddressType *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
 
     uint32_t *hostAddr = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
     for (uint32_t i = 0; i < packetsInUse; i++) {
@@ -917,7 +917,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForN
 
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
         if (CpuIntrinsicsTests::pauseCounter > 10) {
-            volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress;
+            volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress;
             for (uint32_t i = 0; i < packetsInUse; i++) {
                 *nextPacket = Event::STATE_SIGNALED;
                 nextPacket = ptrOffset(nextPacket, eventPacketSize);
@@ -938,12 +938,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForO
     const size_t eventPacketSize = event->getSinglePacketSize();
     const size_t eventCompletionOffset = event->getContextEndOffset();
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
     CpuIntrinsicsTests::pauseCounter = 0u;
-    CpuIntrinsicsTests::pauseAddress = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
+    CpuIntrinsicsTests::pauseAddress = static_cast<TagAddressType *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
 
     uint32_t *hostAddr = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
     for (uint32_t i = 0; i < packetsInUse; i++) {
@@ -953,7 +953,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForO
 
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
         if (CpuIntrinsicsTests::pauseCounter > 10) {
-            volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress;
+            volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress;
             for (uint32_t i = 0; i < packetsInUse; i++) {
                 *nextPacket = Event::STATE_SIGNALED;
                 nextPacket = ptrOffset(nextPacket, eventPacketSize);
@@ -974,12 +974,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForT
     const size_t eventPacketSize = event->getSinglePacketSize();
     const size_t eventCompletionOffset = event->getContextEndOffset();
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
     CpuIntrinsicsTests::pauseCounter = 0u;
-    CpuIntrinsicsTests::pauseAddress = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
+    CpuIntrinsicsTests::pauseAddress = static_cast<TagAddressType *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
 
     uint32_t *hostAddr = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
     for (uint32_t i = 0; i < packetsInUse; i++) {
@@ -989,7 +989,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForT
 
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
         if (CpuIntrinsicsTests::pauseCounter > 10) {
-            volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress;
+            volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress;
             for (uint32_t i = 0; i < packetsInUse; i++) {
                 *nextPacket = Event::STATE_SIGNALED;
                 nextPacket = ptrOffset(nextPacket, eventPacketSize);
@@ -2108,8 +2108,8 @@ HWTEST_F(EventTests,
 
     constexpr uint32_t iterations = 5;
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
     neoDevice->getUltCommandStreamReceiver<FamilyType>().commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX;
@@ -2124,7 +2124,7 @@ HWTEST_F(EventTests,
     if (event->isUsingContextEndOffset()) {
         eventCompletionOffset = event->getContextEndOffset();
     }
-    uint32_t *eventAddress = static_cast<uint32_t *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
+    TagAddressType *eventAddress = static_cast<TagAddressType *>(ptrOffset(event->getHostAddress(), eventCompletionOffset));
     *eventAddress = Event::STATE_INITIAL;
 
     CpuIntrinsicsTests::pauseCounter = 0u;
@@ -2132,7 +2132,7 @@ HWTEST_F(EventTests,
 
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
         if (CpuIntrinsicsTests::pauseCounter >= iterations) {
-            volatile uint32_t *packet = CpuIntrinsicsTests::pauseAddress;
+            volatile TagAddressType *packet = CpuIntrinsicsTests::pauseAddress;
             *packet = Event::STATE_SIGNALED;
         }
     };
diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp
index 73e3fbc4f7..e68310176c 100644
--- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp
+++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp
@@ -24,8 +24,8 @@ using namespace std::chrono_literals;
 
 namespace CpuIntrinsicsTests {
 extern std::atomic<uint32_t> pauseCounter;
-extern volatile uint32_t *pauseAddress;
-extern uint32_t pauseValue;
+extern volatile TagAddressType *pauseAddress;
+extern TaskCountType pauseValue;
 extern uint32_t pauseOffset;
 extern std::function<void()> setupPauseAddress;
 } // namespace CpuIntrinsicsTests
@@ -247,14 +247,14 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh
 
     fence->taskCount = 1;
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
     CpuIntrinsicsTests::pauseCounter = 0u;
     CpuIntrinsicsTests::pauseAddress = csr->getTagAddress();
 
-    volatile uint32_t *hostAddr = csr->getTagAddress();
+    volatile TagAddressType *hostAddr = csr->getTagAddress();
     for (uint32_t i = 0; i < activePartitions; i++) {
         *hostAddr = 0;
         hostAddr = ptrOffset(hostAddr, postSyncOffset);
@@ -262,7 +262,7 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh
 
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
         if (CpuIntrinsicsTests::pauseCounter > 10) {
-            volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress;
+            volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress;
             for (uint32_t i = 0; i < activePartitions; i++) {
                 *nextPacket = 1;
                 nextPacket = ptrOffset(nextPacket, postSyncOffset);
diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp
index 675f9ba5bb..6f9245cb2a 100644
--- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp
+++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp
@@ -2335,9 +2335,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
     EXPECT_NE(nullptr, ptr1);
 
     auto allocationData1 = svmManager->getSVMAlloc(ptr1);
-    uint32_t prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
+    TaskCountType prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
     svmManager->prepareIndirectAllocationForDestruction(allocationData1);
-    uint32_t postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
+    TaskCountType postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
 
     EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1);
 
diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp
index e129989e30..d5e344de14 100644
--- a/opencl/source/command_queue/command_queue.cpp
+++ b/opencl/source/command_queue/command_queue.cpp
@@ -60,7 +60,7 @@ CommandQueue *CommandQueue::create(Context *context,
     return funcCreate(context, device, properties, internalUsage);
 }
 
-cl_int CommandQueue::getErrorCodeFromTaskCount(uint32_t taskCount) {
+cl_int CommandQueue::getErrorCodeFromTaskCount(TaskCountType taskCount) {
     switch (taskCount) {
     case CompletionStamp::gpuHang:
     case CompletionStamp::outOfDeviceMemory:
@@ -378,16 +378,16 @@ Device &CommandQueue::getDevice() const noexcept {
     return device->getDevice();
 }
 
-uint32_t CommandQueue::getHwTag() const {
-    uint32_t tag = *getHwTagAddress();
+TagAddressType CommandQueue::getHwTag() const {
+    TagAddressType tag = *getHwTagAddress();
     return tag;
 }
 
-volatile uint32_t *CommandQueue::getHwTagAddress() const {
+volatile TagAddressType *CommandQueue::getHwTagAddress() const {
     return getGpgpuCommandStreamReceiver().getTagAddress();
 }
 
-bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) {
+bool CommandQueue::isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState) {
     DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady);
 
     if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) {
@@ -401,7 +401,7 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
     return false;
 }
 
-WaitStatus CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
+WaitStatus CommandQueue::waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
     WAIT_ENTER()
 
     WaitStatus waitStatus{WaitStatus::Ready};
@@ -496,12 +496,12 @@ cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName,
     return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet);
 }
 
-uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
-                                                cl_uint numEventsInWaitList,
-                                                const cl_event *eventWaitList) {
+TaskCountType CommandQueue::getTaskLevelFromWaitList(TaskCountType taskLevel,
+                                                     cl_uint numEventsInWaitList,
+                                                     const cl_event *eventWaitList) {
     for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) {
         auto pEvent = (Event *)(eventWaitList[iEvent]);
-        uint32_t eventTaskLevel = pEvent->peekTaskLevel();
+        TaskCountType eventTaskLevel = pEvent->peekTaskLevel();
         taskLevel = std::max(taskLevel, eventTaskLevel);
     }
     return taskLevel;
@@ -862,13 +862,13 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
     }
 }
 
-void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) {
+void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, TaskCountType newBcsTaskCount) {
     CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
     state.engineType = bcsEngineType;
     state.taskCount = newBcsTaskCount;
 }
 
-uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
+TaskCountType CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
     const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
     return state.taskCount;
 }
diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h
index 14824963b0..8dbf68c9f7 100644
--- a/opencl/source/command_queue/command_queue.h
+++ b/opencl/source/command_queue/command_queue.h
@@ -58,7 +58,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
                                 bool internalUsage,
                                 cl_int &errcodeRet);
 
-    static cl_int getErrorCodeFromTaskCount(uint32_t taskCount);
+    static cl_int getErrorCodeFromTaskCount(TaskCountType taskCount);
 
     CommandQueue() = delete;
 
@@ -199,19 +199,19 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
                                size_t paramValueSize, void *paramValue,
                                size_t *paramValueSizeRet);
 
-    uint32_t getHwTag() const;
+    TagAddressType getHwTag() const;
 
-    volatile uint32_t *getHwTagAddress() const;
+    volatile TagAddressType *getHwTagAddress() const;
 
-    bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState);
+    bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState);
 
     bool isWaitForTimestampsEnabled() const;
-    virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0;
+    virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0;
 
     MOCKABLE_VIRTUAL bool isQueueBlocked();
 
-    MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait);
-    MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
+    MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait);
+    MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
         return this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false);
     }
     MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList);
@@ -219,9 +219,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
         return this->waitForAllEngines(blockedQueue, printfHandler, true);
     }
 
-    static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
-                                             cl_uint numEventsInWaitList,
-                                             const cl_event *eventWaitList);
+    static TaskCountType getTaskLevelFromWaitList(TaskCountType taskLevel,
+                                                  cl_uint numEventsInWaitList,
+                                                  const cl_event *eventWaitList);
 
     void initializeGpgpu() const;
     void initializeGpgpuInternals() const;
@@ -330,8 +330,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
     template <typename PtrType>
     static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
 
-    void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount);
-    uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
+    void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, TaskCountType newBcsTaskCount);
+    TaskCountType peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
 
     void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; }
     EnqueueProperties::Operation peekLatestSentEnqueueOperation() { return this->latestSentEnqueueType; }
@@ -343,10 +343,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
     void clearLastBcsPackets();
 
     // taskCount of last task
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
 
     // current taskLevel. Used for determining if a PIPE_CONTROL is needed.
-    uint32_t taskLevel = 0;
+    TaskCountType taskLevel = 0;
 
     std::unique_ptr<FlushStampTracker> flushStamp;
 
@@ -370,7 +370,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
     void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
     cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
 
-    virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
+    virtual void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
     bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const;
 
     MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr);
diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h
index 216ce6de07..00a3205603 100644
--- a/opencl/source/command_queue/command_queue_hw.h
+++ b/opencl/source/command_queue/command_queue_hw.h
@@ -375,7 +375,7 @@ class CommandQueueHw : public CommandQueue {
                                       TimestampPacketDependencies &timestampPacketDependencies,
                                       EventsRequest &eventsRequest,
                                       EventBuilder &eventBuilder,
-                                      uint32_t taskLevel,
+                                      TaskCountType taskLevel,
                                       PrintfHandler *printfHandler);
 
     void enqueueBlocked(uint32_t commandType,
@@ -399,7 +399,7 @@ class CommandQueueHw : public CommandQueue {
                                                 TimestampPacketDependencies &timestampPacketDependencies,
                                                 EventsRequest &eventsRequest,
                                                 EventBuilder &eventBuilder,
-                                                uint32_t taskLevel,
+                                                TaskCountType taskLevel,
                                                 CsrDependencies &csrDeps,
                                                 CommandStreamReceiver *bcsCsr);
     void processDispatchForCacheFlush(Surface **surfaces,
@@ -427,7 +427,7 @@ class CommandQueueHw : public CommandQueue {
 
     bool isCacheFlushCommand(uint32_t commandType) const override;
 
-    bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override;
+    bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override;
 
     MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;
 
@@ -481,8 +481,8 @@ class CommandQueueHw : public CommandQueue {
 
     bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override;
 
-    bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
-    void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
+    bool isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
+    void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
     static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
                                                    size_t *hostOffset,
                                                    const size_t *bufferOrigin,
diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl
index 8040661212..26159e66d6 100644
--- a/opencl/source/command_queue/command_queue_hw_base.inl
+++ b/opencl/source/command_queue/command_queue_hw_base.inl
@@ -167,7 +167,7 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
 }
 
 template <typename Family>
-bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) {
+bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) {
     using TSPacketType = typename Family::TimestampPacketType;
     bool waited = false;
 
diff --git a/opencl/source/command_queue/copy_engine_state.h b/opencl/source/command_queue/copy_engine_state.h
index ee9dafd16b..b87e42acc7 100644
--- a/opencl/source/command_queue/copy_engine_state.h
+++ b/opencl/source/command_queue/copy_engine_state.h
@@ -7,12 +7,14 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
+
 #include "aubstream/engine_node.h"
 
 namespace NEO {
 struct CopyEngineState {
     aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
 
     bool isValid() const {
         return engineType != aub_stream::EngineType::NUM_ENGINES;
diff --git a/opencl/source/command_queue/cpu_data_transfer_handler.cpp b/opencl/source/command_queue/cpu_data_transfer_handler.cpp
index dd3e8fff95..165fd33286 100644
--- a/opencl/source/command_queue/cpu_data_transfer_handler.cpp
+++ b/opencl/source/command_queue/cpu_data_transfer_handler.cpp
@@ -56,7 +56,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
     auto commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
 
     auto blockQueue = false;
-    auto taskLevel = 0u;
+    TaskCountType taskLevel = 0u;
     obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType);
 
     DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel);
diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h
index 169b5a90d4..dcd88b4ee0 100644
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@@ -156,7 +156,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
     auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
 
     auto blockQueue = false;
-    auto taskLevel = 0u;
+    TaskCountType taskLevel = 0u;
     obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
 
     enqueueHandlerHook(commandType, multiDispatchInfo);
@@ -623,7 +623,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
 }
 
 template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
+void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
     auto isQueueBlockedStatus = isQueueBlocked();
     taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
     blockQueueStatus = (taskLevel == CompletionStamp::notReady) || isQueueBlockedStatus;
@@ -638,7 +638,7 @@ void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &ta
 }
 
 template <typename GfxFamily>
-bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
+bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
     bool updateTaskLevel = true;
     // if we are blocked by user event then no update
     if (taskLevel == CompletionStamp::notReady) {
@@ -681,7 +681,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
     TimestampPacketDependencies &timestampPacketDependencies,
     EventsRequest &eventsRequest,
     EventBuilder &eventBuilder,
-    uint32_t taskLevel,
+    TaskCountType taskLevel,
     PrintfHandler *printfHandler) {
 
     UNRECOVERABLE_IF(multiDispatchInfo.empty());
@@ -991,7 +991,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
     TimestampPacketDependencies &timestampPacketDependencies,
     EventsRequest &eventsRequest,
     EventBuilder &eventBuilder,
-    uint32_t taskLevel,
+    TaskCountType taskLevel,
     CsrDependencies &csrDeps,
     CommandStreamReceiver *bcsCsr) {
 
@@ -1234,7 +1234,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
     }
 
     auto blockQueue = false;
-    auto taskLevel = 0u;
+    TaskCountType taskLevel = 0u;
     obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType);
     auto clearAllDependencies = queueDependenciesClearRequired();
 
diff --git a/opencl/source/event/async_events_handler.cpp b/opencl/source/event/async_events_handler.cpp
index 7619721e7b..374ba05bb7 100644
--- a/opencl/source/event/async_events_handler.cpp
+++ b/opencl/source/event/async_events_handler.cpp
@@ -38,7 +38,7 @@ void AsyncEventsHandler::registerEvent(Event *event) {
 }
 
 Event *AsyncEventsHandler::processList() {
-    uint32_t lowestTaskCount = CompletionStamp::notReady;
+    TaskCountType lowestTaskCount = CompletionStamp::notReady;
     Event *sleepCandidate = nullptr;
     pendingList.clear();
 
diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp
index 84141fe10c..067856194b 100644
--- a/opencl/source/event/event.cpp
+++ b/opencl/source/event/event.cpp
@@ -8,6 +8,7 @@
 #include "opencl/source/event/event.h"
 
 #include "shared/source/command_stream/command_stream_receiver.h"
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/device/device.h"
 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/get_info.h"
@@ -30,13 +31,12 @@
 #include <algorithm>
 
 namespace NEO {
-
 Event::Event(
     Context *ctx,
     CommandQueue *cmdQueue,
     cl_command_type cmdType,
-    uint32_t taskLevel,
-    uint32_t taskCount)
+    TaskCountType taskLevel,
+    TaskCountType taskCount)
     : taskLevel(taskLevel),
       currentCmdQVirtualEvent(false),
       cmdToSubmit(nullptr),
@@ -90,8 +90,8 @@ Event::Event(
 Event::Event(
     CommandQueue *cmdQueue,
     cl_command_type cmdType,
-    uint32_t taskLevel,
-    uint32_t taskCount)
+    TaskCountType taskLevel,
+    TaskCountType taskCount)
     : Event(nullptr, cmdQueue, cmdType, taskLevel, taskCount) {
 }
 
@@ -224,7 +224,7 @@ void Event::setupBcs(aub_stream::EngineType bcsEngineType) {
     this->bcsState.engineType = bcsEngineType;
 }
 
-uint32_t Event::peekBcsTaskCountFromCommandQueue() {
+TaskCountType Event::peekBcsTaskCountFromCommandQueue() {
     if (bcsState.isValid()) {
         return this->cmdQueue->peekBcsTaskCount(bcsState.engineType);
     } else {
@@ -232,11 +232,11 @@ uint32_t Event::peekBcsTaskCountFromCommandQueue() {
     }
 }
 
-uint32_t Event::getCompletionStamp() const {
+TaskCountType Event::getCompletionStamp() const {
     return this->taskCount;
 }
 
-void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) {
+void Event::updateCompletionStamp(TaskCountType gpgpuTaskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp) {
     this->taskCount = gpgpuTaskCount;
     this->bcsState.taskCount = bcsTaskCount;
     this->taskLevel = tasklevel;
@@ -500,7 +500,7 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
     (void)status;
     DEBUG_BREAK_IF(!(isStatusCompleted(status) || (peekIsSubmitted(status))));
 
-    uint32_t taskLevelToPropagate = CompletionStamp::notReady;
+    TaskCountType taskLevelToPropagate = CompletionStamp::notReady;
 
     if (isStatusCompletedByTermination(transitionStatus) == false) {
         // if we are event on top of the tree , obtain taskLevel from CSR
@@ -728,11 +728,11 @@ bool Event::areTimestampsCompleted() {
     return false;
 }
 
-uint32_t Event::getTaskLevel() {
+TaskCountType Event::getTaskLevel() {
     return taskLevel;
 }
 
-inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) {
+inline void Event::unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) {
     int32_t numEventsBlockingThis = --parentCount;
     DEBUG_BREAK_IF(numEventsBlockingThis < 0);
 
@@ -901,7 +901,7 @@ bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_eve
     return userEventsDependencies;
 }
 
-uint32_t Event::peekTaskLevel() const {
+TaskCountType Event::peekTaskLevel() const {
     return taskLevel;
 }
 
diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h
index 1111e484c4..7468754286 100644
--- a/opencl/source/event/event.h
+++ b/opencl/source/event/event.h
@@ -82,7 +82,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
     static constexpr cl_int executionAbortedDueToGpuHang = -777;
 
     Event(CommandQueue *cmdQueue, cl_command_type cmdType,
-          uint32_t taskLevel, uint32_t taskCount);
+          TaskCountType taskLevel, TaskCountType taskCount);
 
     Event(const Event &) = delete;
     Event &operator=(const Event &) = delete;
@@ -90,10 +90,10 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
     ~Event() override;
 
     void setupBcs(aub_stream::EngineType bcsEngineType);
-    uint32_t peekBcsTaskCountFromCommandQueue();
+    TaskCountType peekBcsTaskCountFromCommandQueue();
 
-    uint32_t getCompletionStamp() const;
-    void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp);
+    TaskCountType getCompletionStamp() const;
+    void updateCompletionStamp(TaskCountType taskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp);
     cl_ulong getDelta(cl_ulong startTime,
                       cl_ulong endTime);
     void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
@@ -130,9 +130,9 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
     TagNodeBase *getHwPerfCounterNode();
 
     std::unique_ptr<FlushStampTracker> flushStamp;
-    std::atomic<uint32_t> taskLevel;
+    std::atomic<TaskCountType> taskLevel;
 
-    uint32_t peekTaskLevel() const;
+    TaskCountType peekTaskLevel() const;
     void addChild(Event &e);
 
     virtual bool setStatus(cl_int status);
@@ -239,7 +239,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
         return cmdType;
     }
 
-    virtual uint32_t getTaskLevel();
+    virtual TaskCountType getTaskLevel();
 
     cl_int peekExecutionStatus() const {
         return executionStatus;
@@ -254,16 +254,16 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
         return (peekNumEventsBlockingThis() > 0);
     }
 
-    virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus);
+    virtual void unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus);
 
-    void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) {
+    void updateTaskCount(TaskCountType gpgpuTaskCount, TaskCountType bcsTaskCount) {
         if (gpgpuTaskCount == CompletionStamp::notReady) {
             DEBUG_BREAK_IF(true);
             return;
         }
 
         this->bcsState.taskCount = bcsTaskCount;
-        uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount);
+        TaskCountType prevTaskCount = this->taskCount.exchange(gpgpuTaskCount);
         if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) {
             this->taskCount = prevTaskCount;
             DEBUG_BREAK_IF(true);
@@ -281,7 +281,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
     virtual void updateExecutionStatus();
     void tryFlushEvent();
 
-    uint32_t peekTaskCount() const {
+    TaskCountType peekTaskCount() const {
         return this->taskCount;
     }
 
@@ -311,7 +311,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
 
   protected:
     Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType,
-          uint32_t taskLevel, uint32_t taskCount);
+          TaskCountType taskLevel, TaskCountType taskCount);
 
     ECallbackTarget translateToCallbackTarget(cl_int execStatus) {
         switch (execStatus) {
@@ -394,6 +394,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
 
   private:
     // can be accessed only with updateTaskCount
-    std::atomic<uint32_t> taskCount;
+    std::atomic<TaskCountType> taskCount;
 };
 } // namespace NEO
diff --git a/opencl/source/event/user_event.cpp b/opencl/source/event/user_event.cpp
index 1631f46228..1d7b88ff4f 100644
--- a/opencl/source/event/user_event.cpp
+++ b/opencl/source/event/user_event.cpp
@@ -33,7 +33,7 @@ WaitStatus UserEvent::wait(bool blocking, bool useQuickKmdSleep) {
     return WaitStatus::Ready;
 }
 
-uint32_t UserEvent::getTaskLevel() {
+TaskCountType UserEvent::getTaskLevel() {
     if (peekExecutionStatus() == CL_COMPLETE) {
         return 0;
     }
@@ -64,8 +64,8 @@ WaitStatus VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) {
     return WaitStatus::Ready;
 }
 
-uint32_t VirtualEvent::getTaskLevel() {
-    uint32_t taskLevel = 0;
+TaskCountType VirtualEvent::getTaskLevel() {
+    TaskCountType taskLevel = 0;
     if (cmdQueue != nullptr) {
         auto &csr = cmdQueue->getGpgpuCommandStreamReceiver();
         taskLevel = csr.peekTaskLevel();
diff --git a/opencl/source/event/user_event.h b/opencl/source/event/user_event.h
index e5c69027f3..3271599d9c 100644
--- a/opencl/source/event/user_event.h
+++ b/opencl/source/event/user_event.h
@@ -22,7 +22,7 @@ class UserEvent : public Event {
 
     void updateExecutionStatus() override;
 
-    uint32_t getTaskLevel() override;
+    TaskCountType getTaskLevel() override;
 
     bool isInitialEventStatus() const;
 };
@@ -39,6 +39,6 @@ class VirtualEvent : public Event {
 
     void updateExecutionStatus() override;
 
-    uint32_t getTaskLevel() override;
+    TaskCountType getTaskLevel() override;
 };
 } // namespace NEO
diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp
index 5651bfa6ad..e2b1febec6 100644
--- a/opencl/source/gtpin/gtpin_callbacks.cpp
+++ b/opencl/source/gtpin/gtpin_callbacks.cpp
@@ -169,7 +169,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
     }
 }
 
-void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
+void gtpinNotifyFlushTask(TaskCountType flushedTaskCount) {
     if (isGTPinInitialized) {
         std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
         size_t numElems = kernelExecQueue.size();
@@ -185,7 +185,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
     }
 }
 
-void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
+void gtpinNotifyTaskCompletion(TaskCountType completedTaskCount) {
     std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
     size_t numElems = kernelExecQueue.size();
     for (size_t n = 0; n < numElems;) {
diff --git a/opencl/source/gtpin/gtpin_defs.h b/opencl/source/gtpin/gtpin_defs.h
index e2f1766bb0..1f5584c303 100644
--- a/opencl/source/gtpin/gtpin_defs.h
+++ b/opencl/source/gtpin/gtpin_defs.h
@@ -20,7 +20,7 @@ struct GTPinKernelExec {
     cl_mem gtpinResource;
     CommandQueue *pCommandQueue;
     gtpin::command_buffer_handle_t commandBuffer;
-    uint32_t taskCount;
+    TaskCountType taskCount;
     bool isTaskCountValid;
     bool isResourceResident;
 
diff --git a/opencl/source/gtpin/gtpin_notify.h b/opencl/source/gtpin/gtpin_notify.h
index 5bcba6a907..86e87bf313 100644
--- a/opencl/source/gtpin/gtpin_notify.h
+++ b/opencl/source/gtpin/gtpin_notify.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
+
 #include "CL/cl.h"
 
 #include <cstdint>
@@ -19,8 +21,8 @@ void gtpinNotifyContextDestroy(cl_context context);
 void gtpinNotifyKernelCreate(cl_kernel kernel);
 void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue);
 void gtpinNotifyPreFlushTask(void *pCmdQueue);
-void gtpinNotifyFlushTask(uint32_t flushedTaskCount);
-void gtpinNotifyTaskCompletion(uint32_t completedTaskCount);
+void gtpinNotifyFlushTask(TaskCountType flushedTaskCount);
+void gtpinNotifyTaskCompletion(TaskCountType completedTaskCount);
 void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver);
 void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector);
 void gtpinNotifyPlatformShutdown();
diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp
index 31b4783774..aca6eda809 100644
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@@ -35,7 +35,7 @@ CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj,
     memObj.incRefInternal();
 }
 
-CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
+CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminated) {
     DecRefInternalAtScopeEnd decRefInternalAtScopeEnd{memObj};
 
     if (terminated) {
@@ -130,7 +130,7 @@ CommandComputeKernel::~CommandComputeKernel() {
     kernel->decRefInternal();
 }
 
-CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) {
+CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool terminated) {
     if (terminated) {
         this->terminated = true;
         for (auto surface : surfaces) {
@@ -295,7 +295,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
     return completionStamp;
 }
 
-uint32_t CommandWithoutKernel::dispatchBlitOperation() {
+TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
     auto bcsCsr = kernelOperation->bcsCsr;
     UNRECOVERABLE_IF(bcsCsr == nullptr);
 
@@ -322,7 +322,7 @@ uint32_t CommandWithoutKernel::dispatchBlitOperation() {
     return newTaskCount;
 }
 
-CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
+CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool terminated) {
     if (terminated) {
         this->terminated = true;
         return completionStamp;
diff --git a/opencl/source/helpers/task_information.h b/opencl/source/helpers/task_information.h
index be87b0000d..decbf2e112 100644
--- a/opencl/source/helpers/task_information.h
+++ b/opencl/source/helpers/task_information.h
@@ -85,7 +85,7 @@ class Command : public IFNode<Command> {
   public:
     // returns command's taskCount obtained from completion stamp
     //   as acquired from command stream receiver
-    virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0;
+    virtual CompletionStamp &submit(TaskCountType taskLevel, bool terminated) = 0;
 
     Command() = delete;
     Command(CommandQueue &commandQueue);
@@ -117,7 +117,7 @@ class CommandMapUnmap : public Command {
     CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
                     CommandQueue &commandQueue);
     ~CommandMapUnmap() override = default;
-    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
+    CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override;
 
   private:
     MemObj &memObj;
@@ -135,7 +135,7 @@ class CommandComputeKernel : public Command {
 
     ~CommandComputeKernel() override;
 
-    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
+    CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override;
 
     LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); }
     Kernel *peekKernel() const { return kernel; }
@@ -155,7 +155,7 @@ class CommandComputeKernel : public Command {
 class CommandWithoutKernel : public Command {
   public:
     using Command::Command;
-    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
-    uint32_t dispatchBlitOperation();
+    CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override;
+    TaskCountType dispatchBlitOperation();
 };
 } // namespace NEO
diff --git a/opencl/source/sharings/gl/gl_arb_sync_event.h b/opencl/source/sharings/gl/gl_arb_sync_event.h
index afc2dba36b..5770498a65 100644
--- a/opencl/source/sharings/gl/gl_arb_sync_event.h
+++ b/opencl/source/sharings/gl/gl_arb_sync_event.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,7 @@ class GlArbSyncEvent : public Event {
     GlArbSyncEvent() = delete;
     ~GlArbSyncEvent() override;
 
-    void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) override;
+    void unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) override;
 
     static GlArbSyncEvent *create(Event &baseEvent);
 
diff --git a/opencl/source/sharings/gl/gl_sync_event.h b/opencl/source/sharings/gl/gl_sync_event.h
index 9ff8e0aa25..a5f8ff4481 100644
--- a/opencl/source/sharings/gl/gl_sync_event.h
+++ b/opencl/source/sharings/gl/gl_sync_event.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@ class GlSyncEvent : public Event {
 
     static GlSyncEvent *create(Context &context, cl_GLsync sync, cl_int *errCode);
     void updateExecutionStatus() override;
-    uint32_t getTaskLevel() override;
+    TaskCountType getTaskLevel() override;
 
     bool isExternallySynchronized() const override { return true; }
 
diff --git a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp
index 309d85d7fe..eb244c499b 100644
--- a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp
+++ b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -201,7 +201,7 @@ GlArbSyncEvent *GlArbSyncEvent::create(Event &baseEvent) {
     return arbSyncEvent;
 }
 
-void GlArbSyncEvent::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) {
+void GlArbSyncEvent::unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) {
     DEBUG_BREAK_IF(&event != this->baseEvent);
     if ((transitionStatus > CL_SUBMITTED) || (transitionStatus < 0)) {
         return;
diff --git a/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp b/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp
index 25c0209b7f..c261898acc 100644
--- a/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp
+++ b/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,7 +54,7 @@ void GlSyncEvent::updateExecutionStatus() {
     }
 }
 
-uint32_t GlSyncEvent::getTaskLevel() {
+TaskCountType GlSyncEvent::getTaskLevel() {
     if (peekExecutionStatus() == CL_COMPLETE) {
         return 0;
     }
diff --git a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl
index 4ae2e2d141..478b68857b 100644
--- a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl
+++ b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl
@@ -131,7 +131,7 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI
 
 struct GTPinMockCommandQueue : MockCommandQueue {
     GTPinMockCommandQueue(Context *context, MockClDevice *device) : MockCommandQueue(context, device, nullptr, false) {}
-    WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
+    WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
         return MockCommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, true);
     }
 
diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h b/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h
index a11a131eff..c0e1b3f83e 100644
--- a/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h
+++ b/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h
@@ -99,7 +99,7 @@ class AUBCommandStreamFixture : public CommandStreamFixture {
         return graphicsAllocation;
     }
     CommandStreamReceiver *pCommandStreamReceiver = nullptr;
-    volatile uint32_t *pTagMemory = nullptr;
+    volatile TagAddressType *pTagMemory = nullptr;
 
   private:
     CommandQueue *commandQueue = nullptr;
diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp
index 85575e3a3e..52c0bcee7e 100644
--- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp
@@ -254,7 +254,7 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted
     class MockEventWithSetCompleteOnUpdate : public Event {
       public:
         MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType,
-                                         uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
+                                         uint32_t taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
         }
         void updateExecutionStatus() override {
             setStatus(CL_COMPLETE);
diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
index aae41c882c..b87838d4f2 100644
--- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
@@ -1012,7 +1012,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
     template <typename Family>
     struct MyCmdQueue : public CommandQueueHw<Family> {
         MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
-        WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
+        WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
             requestedUseQuickKmdSleep = useQuickKmdSleep;
             waitUntilCompleteCounter++;
 
@@ -1069,17 +1069,17 @@ class CommandStreamReceiverHwMock : public CommandStreamReceiverHw<GfxFamily> {
                                 const DeviceBitfield deviceBitfield)
         : CommandStreamReceiverHw<GfxFamily>(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
         waitForTaskCountWithKmdNotifyFallbackCounter++;
         return waitForTaskCountWithKmdNotifyFallbackReturnValue;
     }
 
-    WaitStatus waitForTaskCount(uint32_t requiredTaskCount) override {
+    WaitStatus waitForTaskCount(TaskCountType requiredTaskCount) override {
         waitForTaskCountCalledCounter++;
         return waitForTaskCountReturnValue;
     }
 
-    WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override {
+    WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) override {
         waitForTaskCountAndCleanTemporaryAllocationListCalledCounter++;
         return waitForTaskCountAndCleanTemporaryAllocationListReturnValue;
     }
@@ -1126,7 +1126,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseW
     CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
     cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool cleanTemporaryAllocationList = false;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
 
@@ -1146,7 +1146,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEq
     CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
     cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool cleanTemporaryAllocationList = true;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
 
@@ -1165,7 +1165,7 @@ HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenW
     CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
     cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool skipWait = true;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
 
@@ -1184,7 +1184,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaiting
     CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver();
     cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool skipWait = false;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
 
@@ -1212,7 +1212,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
     cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
     cmdQ->bcsCsrToReturn = bcsCmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool skipWait = false;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{CopyEngineState{}};
 
@@ -1245,7 +1245,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT
     cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
     cmdQ->bcsCsrToReturn = bcsCmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool skipWait = false;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{CopyEngineState{}};
 
@@ -1279,7 +1279,7 @@ HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteT
     cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get();
     cmdQ->bcsCsrToReturn = bcsCmdStream.get();
 
-    constexpr uint32_t taskCount = 0u;
+    constexpr TaskCountType taskCount = 0u;
     constexpr bool skipWait = false;
     StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{CopyEngineState{}};
 
diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp
index 91e2955cbc..c29355a9ad 100644
--- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp
@@ -112,7 +112,7 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
       public:
         MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
 
-        WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
+        WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
             waitUntilCompleteCalled = true;
             return CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
         }
diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp
index 507b998332..12cd927074 100644
--- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp
@@ -810,7 +810,7 @@ class MyCmdQ : public MockCommandQueueHw<FamilyType> {
                                                   auxTranslationDirection);
     }
 
-    WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
+    WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
         waitCalled++;
         return MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
     }
diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp
index 69b465cdf0..a849716586 100644
--- a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -83,7 +83,7 @@ TEST_F(EventTests, WhenWaitingForEventThenPipeControlIsNotInserted) {
     retVal = Event::waitForEvents(1, &event);
     EXPECT_EQ(CL_SUCCESS, retVal);
     //we expect event is completed
-    uint32_t taskCountOfEvent = pEvent->peekTaskCount();
+    TaskCountType taskCountOfEvent = pEvent->peekTaskCount();
     EXPECT_LE(taskCountOfEvent, pCmdQ->getHwTag());
     // no more tasks after WFE, no need to write PC
     EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel());
diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp
index ee9550f462..992beba26f 100644
--- a/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp
@@ -47,7 +47,7 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesS
         threads.push_back(std::thread(function));
     }
 
-    auto currentTaskCount = 0;
+    int64_t currentTaskCount = 0;
 
     startEnqueueProcess = true;
 
diff --git a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp
index 65c7071568..7c53dce283 100644
--- a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp
@@ -291,7 +291,7 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer
     MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
 
     auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
-    uint32_t taskCount = commandStreamReceiver.peekTaskCount();
+    TaskCountType taskCount = commandStreamReceiver.peekTaskCount();
     EXPECT_EQ(0u, taskCount);
 
     // enqueue something that can be finished...
@@ -478,7 +478,7 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThe
     EXPECT_NE(nullptr, buffer);
 
     auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
-    uint32_t taskCount = commandStreamReceiver.peekTaskCount();
+    TaskCountType taskCount = commandStreamReceiver.peekTaskCount();
     EXPECT_EQ(0u, taskCount);
 
     // enqueue something that map buffer needs to wait for
@@ -561,7 +561,7 @@ HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsC
     EXPECT_EQ(retVal, CL_SUCCESS);
 
     auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
-    uint32_t taskCount = commandStreamReceiver.peekTaskCount();
+    TaskCountType taskCount = commandStreamReceiver.peekTaskCount();
     EXPECT_EQ(1u, taskCount);
 
     auto ptrResult = clEnqueueMapBuffer(
diff --git a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp
index 267b6b537a..ff50efbde5 100644
--- a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp
@@ -342,7 +342,7 @@ HWTEST_F(EnqueueMapImageTest, givenNonReadOnlyMapWithOutEventWhenMappedThenSetEv
         }
     };
 
-    uint32_t taskCount = commandStreamReceiver.peekTaskCount();
+    TaskCountType taskCount = commandStreamReceiver.peekTaskCount();
     EXPECT_EQ(1u, taskCount);
 
     // enqueue something that can be finished...
@@ -711,7 +711,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith2DImageIs
     class MockEventWithSetCompleteOnUpdate : public Event {
       public:
         MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType,
-                                         uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
+                                         TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
         }
         void updateExecutionStatus() override {
             setStatus(CL_COMPLETE);
@@ -753,7 +753,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith1DImageIs
     class MockEventWithSetCompleteOnUpdate : public Event {
       public:
         MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType,
-                                         uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
+                                         TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {
         }
         void updateExecutionStatus() override {
             setStatus(CL_COMPLETE);
diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
index 33dacdf184..2004a521b2 100644
--- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
@@ -165,7 +165,7 @@ struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw<GfxFam
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                              uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         return CompletionStamp{0u, 0u, static_cast<FlushStamp>(0u)};
     }
 };
diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp
index ea16ebcc46..c664cc31c3 100644
--- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp
+++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp
@@ -52,7 +52,7 @@ class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
 
         auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
         pTagMemory = commandStreamReceiver.getTagAddress();
-        ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
+        ASSERT_NE(nullptr, const_cast<TagAddressType *>(pTagMemory));
 
         context = new NEO::MockContext(pClDevice);
     }
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp
index 2cf09607ec..a35a1e178d 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp
@@ -164,7 +164,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa
     MockContext ctx(pClDevice);
     MockCommandQueueHw<FamilyType> mockCmdQueue(&ctx, pClDevice, nullptr);
     auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
     taskLevel = taskCount;
     mockCmdQueue.taskCount = taskCount;
     mockCmdQueue.taskLevel = taskCount;
@@ -203,7 +203,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonDcFlushWithInitialTaskCoun
     auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
     size_t gws = 1;
 
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
     taskLevel = taskCount;
     mockCmdQueue.taskCount = taskCount;
     mockCmdQueue.taskLevel = taskCount;
@@ -237,7 +237,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenDcFlushWhenFinishingThenTaskC
     auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
     EXPECT_EQ(retVal, CL_SUCCESS);
 
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
     taskLevel = taskCount;
     mockCmdQueue.taskCount = taskCount;
     mockCmdQueue.taskLevel = taskCount;
@@ -311,7 +311,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEventIsQueriedWhenEnqueuingTh
     auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
     EXPECT_EQ(retVal, CL_SUCCESS);
 
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
     taskLevel = taskCount;
     commandQueue.taskCount = taskCount;
     commandQueue.taskLevel = taskCount;
@@ -346,7 +346,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapEnqueueWhenFini
     MockGraphicsAllocation allocation{cpuAllocation.get(), MemoryConstants::pageSize};
     AlignedBuffer mockBuffer{&ctx, &allocation};
 
-    uint32_t taskCount = 0;
+    TaskCountType taskCount = 0;
     taskLevel = taskCount;
     commandQueue.taskCount = taskCount;
     commandQueue.taskLevel = taskCount;
@@ -588,7 +588,7 @@ struct MockScratchController : public ScratchSpaceController {
                                  uint32_t scratchSlot,
                                  uint32_t requiredPerThreadScratchSize,
                                  uint32_t requiredPerThreadPrivateScratchSize,
-                                 uint32_t currentTaskCount,
+                                 TaskCountType currentTaskCount,
                                  OsContext &osContext,
                                  bool &stateBaseAddressDirty,
                                  bool &vfeStateDirty) override {
@@ -607,7 +607,7 @@ struct MockScratchController : public ScratchSpaceController {
                       uint32_t scratchSlot,
                       uint32_t requiredPerThreadScratchSize,
                       uint32_t requiredPerThreadPrivateScratchSize,
-                      uint32_t currentTaskCount,
+                      TaskCountType currentTaskCount,
                       OsContext &osContext,
                       bool &stateBaseAddressDirty,
                       bool &vfeStateDirty) override {
@@ -615,7 +615,7 @@ struct MockScratchController : public ScratchSpaceController {
     void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                uint32_t requiredPerThreadScratchSize,
                                                uint32_t requiredPerThreadPrivateScratchSize,
-                                               uint32_t currentTaskCount,
+                                               TaskCountType currentTaskCount,
                                                OsContext &osContext,
                                                bool &stateBaseAddressDirty,
                                                bool &vfeStateDirty,
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp
index f36762dc3f..27c8a6043a 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp
@@ -1867,7 +1867,7 @@ class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                              uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         savedDispatchFlags = dispatchFlags;
         return BaseClass::flushTask(commandStream, commandStreamStart,
                                     dsh, ioh, ssh, taskLevel, dispatchFlags, device);
@@ -2043,7 +2043,7 @@ class MockCsrWithFailingFlush : public CommandStreamReceiverHw<GfxFamily> {
     SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
         return SubmissionStatus::FAILED;
     }
-    uint32_t tag = 0;
+    TagAddressType tag = 0;
 };
 
 HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutIsCalledWhenFlushBatchedSubmissionsReturnsFailureThenItIsPropagated) {
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
index ae3398c719..bc38049ba0 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
@@ -765,18 +765,18 @@ HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenPolicyValueChangedWhenFlushi
 }
 
 namespace CpuIntrinsicsTests {
-extern volatile uint32_t *pauseAddress;
-extern uint32_t pauseValue;
+extern volatile TagAddressType *pauseAddress;
+extern TaskCountType pauseValue;
 } // namespace CpuIntrinsicsTests
 
 HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitWhenTagValueSwitchesThenWaitFunctionReturnsTrue) {
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
 
     auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
     pDevice->resetCommandStreamReceiver(mockCsr);
 
-    uint32_t taskCountToWait = 2u;
+    TaskCountType taskCountToWait = 2u;
 
     *mockCsr->tagAddress = 1u;
 
@@ -788,13 +788,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
 }
 
 HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitAndIndefinitelyPollWhenWaitForCompletionThenDoNotCallWaitUtils) {
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
 
     auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
     pDevice->resetCommandStreamReceiver(mockCsr);
 
-    uint32_t taskCountToWait = 2u;
+    TaskCountType taskCountToWait = 2u;
 
     *mockCsr->tagAddress = 1u;
 
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp
index 249e9b557d..1e98bdc454 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp
@@ -637,7 +637,7 @@ class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
   public:
     using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
                                                      bool useQuickKmdSleep, QueueThrottle throttle) override {
         waitForTaskCountWithKmdNotifyFallbackCalled++;
         taskCountToWaitPassed = taskCountToWait;
@@ -648,7 +648,7 @@ class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
     }
 
     FlushStamp flushStampToWaitPassed = 0;
-    uint32_t taskCountToWaitPassed = 0;
+    TaskCountType taskCountToWaitPassed = 0;
     uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
     bool useQuickKmdSleepPassed = false;
     QueueThrottle throttlePassed = QueueThrottle::MEDIUM;
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h
index c91d21e5c8..46ff9c6c33 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h
@@ -22,7 +22,7 @@ struct BcsTests : public Test<ClDeviceFixture> {
         Test<ClDeviceFixture>::TearDown();
     }
 
-    uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
+    TaskCountType flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
         BlitPropertiesContainer container;
         container.push_back(blitProperties);
 
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp
index 8bcaaab82e..769b667d14 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp
@@ -345,7 +345,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
                                      uint32_t scratchSlot,
                                      uint32_t requiredPerThreadScratchSize,
                                      uint32_t requiredPerThreadPrivateScratchSize,
-                                     uint32_t currentTaskCount,
+                                     TaskCountType currentTaskCount,
                                      OsContext &osContext,
                                      bool &stateBaseAddressDirty,
                                      bool &vfeStateDirty) override {
diff --git a/opencl/test/unit_test/device/device_tests.cpp b/opencl/test/unit_test/device/device_tests.cpp
index 0c85230703..a0b7d5dc28 100644
--- a/opencl/test/unit_test/device/device_tests.cpp
+++ b/opencl/test/unit_test/device/device_tests.cpp
@@ -58,7 +58,7 @@ TEST_F(DeviceTest, WhenDeviceIsCreatedThenEnabledClVersionMatchesHardwareInfo) {
 TEST_F(DeviceTest, givenDeviceWhenEngineIsCreatedThenSetInitialValueForTag) {
     for (auto &engine : pDevice->allEngines) {
         auto tagAddress = engine.commandStreamReceiver->getTagAddress();
-        ASSERT_NE(nullptr, const_cast<uint32_t *>(tagAddress));
+        ASSERT_NE(nullptr, const_cast<TaskCountType *>(tagAddress));
         EXPECT_EQ(initialHardwareTag, *tagAddress);
     }
 }
diff --git a/opencl/test/unit_test/event/async_events_handler_tests.cpp b/opencl/test/unit_test/event/async_events_handler_tests.cpp
index d74d326f27..9cea668c1b 100644
--- a/opencl/test/unit_test/event/async_events_handler_tests.cpp
+++ b/opencl/test/unit_test/event/async_events_handler_tests.cpp
@@ -26,7 +26,7 @@ class AsyncEventsHandlerTests : public ::testing::Test {
   public:
     class MyEvent : public Event {
       public:
-        MyEvent(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount)
+        MyEvent(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, TaskCountType taskLevel, TaskCountType taskCount)
             : Event(ctx, cmdQueue, cmdType, taskLevel, taskCount) {
             handler.reset(new MockHandler());
         }
@@ -34,7 +34,7 @@ class AsyncEventsHandlerTests : public ::testing::Test {
             //return execution status without updating
             return executionStatus.load();
         }
-        void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) {
+        void setTaskStamp(TaskCountType taskLevel, TaskCountType taskCount) {
             this->taskLevel.store(taskLevel);
             this->updateTaskCount(taskCount, 0);
         }
diff --git a/opencl/test/unit_test/event/event_fixture.h b/opencl/test/unit_test/event/event_fixture.h
index 1e5d589d05..2e2d9b045d 100644
--- a/opencl/test/unit_test/event/event_fixture.h
+++ b/opencl/test/unit_test/event/event_fixture.h
@@ -69,13 +69,13 @@ struct MyUserEvent : public VirtualEvent {
     WaitStatus wait(bool blocking, bool quickKmdSleep) override {
         return VirtualEvent::wait(blocking, quickKmdSleep);
     };
-    uint32_t getTaskLevel() override {
+    TaskCountType getTaskLevel() override {
         return VirtualEvent::getTaskLevel();
     };
 };
 
 struct MyEvent : public Event {
-    MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount)
+    MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, TaskCountType taskLevel, TaskCountType taskCount)
         : Event(cmdQueue, cmdType, taskLevel, taskCount) {
     }
     TimeStampData getQueueTimeStamp() {
diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp
index 884aa7a123..1fc3e4b475 100644
--- a/opencl/test/unit_test/event/event_tests.cpp
+++ b/opencl/test/unit_test/event/event_tests.cpp
@@ -102,7 +102,7 @@ TEST(Event, WhenGettingTaskLevelThenCorrectTaskLevelIsReturned) {
       public:
         TempEvent() : Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7){};
 
-        uint32_t getTaskLevel() override {
+        TaskCountType getTaskLevel() override {
             return Event::getTaskLevel();
         }
     };
@@ -272,7 +272,7 @@ TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenCorrectSizeIsRetur
 
 TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionStatusThenClSubmittedIsReturned) {
     uint32_t tagHW = 4;
-    uint32_t taskCount = 5;
+    TaskCountType taskCount = 5;
     *pTagMemory = tagHW;
 
     Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount);
@@ -288,7 +288,7 @@ TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionS
 
 TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) {
     uint32_t tagHW = 5;
-    uint32_t taskCount = 5;
+    TaskCountType taskCount = 5;
     *pTagMemory = tagHW;
 
     Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount);
@@ -304,7 +304,7 @@ TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStat
 
 TEST_F(EventTest, GivenTagCsGreaterThanTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) {
     uint32_t tagHW = 6;
-    uint32_t taskCount = 5;
+    TaskCountType taskCount = 5;
     *pTagMemory = tagHW;
 
     Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount);
@@ -1032,7 +1032,7 @@ class MockCommand : public Command {
   public:
     using Command::Command;
 
-    CompletionStamp &submit(uint32_t taskLevel, bool terminated) override {
+    CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override {
         return completionStamp;
     }
 };
@@ -1173,8 +1173,8 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) {
       public:
         using Event::submitCommand;
         MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType,
-                  uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType,
-                                                                  taskLevel, taskCount) {}
+                  TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType,
+                                                                            taskLevel, taskCount) {}
     };
 
     MockKernelWithInternals kernel(*pClDevice);
@@ -1207,8 +1207,8 @@ HWTEST_F(EventTest, givenVirtualEventWhenSubmitCommandEventNotReadyAndEventWitho
       public:
         using Event::submitCommand;
         MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType,
-                  uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType,
-                                                                  taskLevel, taskCount) {}
+                  TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType,
+                                                                            taskLevel, taskCount) {}
     };
 
     auto virtualEvent = makeReleaseable<MockEvent>(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady);
@@ -1642,7 +1642,7 @@ struct TestEventCsr : public UltCommandStreamReceiver<GfxFamily> {
     TestEventCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
         : UltCommandStreamReceiver<GfxFamily>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
 
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         waitForCompletionWithTimeoutCalled++;
         waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait});
         return waitForCompletionWithTimeoutResult;
@@ -1651,7 +1651,7 @@ struct TestEventCsr : public UltCommandStreamReceiver<GfxFamily> {
     struct WaitForCompletionWithTimeoutParams {
         bool enableTimeout = false;
         int64_t timeoutMs{};
-        uint32_t taskCountToWait{};
+        TaskCountType taskCountToWait{};
     };
 
     uint32_t waitForCompletionWithTimeoutCalled = 0u;
diff --git a/opencl/test/unit_test/fixtures/cl_device_fixture.cpp b/opencl/test/unit_test/fixtures/cl_device_fixture.cpp
index e4daa6fed7..a5cb0fc73d 100644
--- a/opencl/test/unit_test/fixtures/cl_device_fixture.cpp
+++ b/opencl/test/unit_test/fixtures/cl_device_fixture.cpp
@@ -28,7 +28,7 @@ void ClDeviceFixture::setUpImpl(const NEO::HardwareInfo *hardwareInfo) {
 
     auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
     pTagMemory = commandStreamReceiver.getTagAddress();
-    ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
+    ASSERT_NE(nullptr, const_cast<TagAddressType *>(pTagMemory));
     this->osContext = pDevice->getDefaultEngine().osContext;
 }
 
diff --git a/opencl/test/unit_test/fixtures/cl_device_fixture.h b/opencl/test/unit_test/fixtures/cl_device_fixture.h
index 74ffd756ea..c3014e3587 100644
--- a/opencl/test/unit_test/fixtures/cl_device_fixture.h
+++ b/opencl/test/unit_test/fixtures/cl_device_fixture.h
@@ -25,7 +25,7 @@ struct ClDeviceFixture {
 
     MockDevice *pDevice = nullptr;
     MockClDevice *pClDevice = nullptr;
-    volatile uint32_t *pTagMemory = nullptr;
+    volatile TagAddressType *pTagMemory = nullptr;
     HardwareInfo hardwareInfo = {};
     PLATFORM platformHelper = {};
     OsContext *osContext = nullptr;
diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h
index 1b9f08cfc2..ce187f18c1 100644
--- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h
+++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h
@@ -173,7 +173,7 @@ struct UltCommandStreamReceiverTest
     }
 
     DispatchFlags flushTaskFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
-    uint32_t taskLevel = 42;
+    TaskCountType taskLevel = 42;
     LinearStream commandStream;
     IndirectHeap dsh = {nullptr};
     IndirectHeap ioh = {nullptr};
diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp
index dd789bceea..4963575206 100644
--- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp
+++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp
@@ -1782,7 +1782,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKerne
     EXPECT_EQ(CL_SUCCESS, retVal);
 
     // Verify that if flush occurs on another queue then our kernel is not flushed to CSR
-    uint32_t taskCount = 11;
+    TaskCountType taskCount = 11;
     gtpinNotifyPreFlushTask(nullptr);
     EXPECT_EQ(1u, kernelExecQueue.size());
     EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid);
@@ -1800,7 +1800,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKerne
     EXPECT_EQ(taskCount, kernelExecQueue[0].taskCount);
 
     // Verify that if previous task was completed then it does not affect our kernel
-    uint32_t taskCompleted = taskCount - 1;
+    TaskCountType taskCompleted = taskCount - 1;
     int prevCount4 = CommandBufferCompleteCallbackCount;
     gtpinNotifyTaskCompletion(taskCompleted);
     EXPECT_EQ(1u, kernelExecQueue.size());
diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp
index 69a13f0e83..8fcc6dce7f 100644
--- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp
+++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp
@@ -89,7 +89,7 @@ struct KmdNotifyTests : public ::testing::Test {
         bool waitForFlushStampResult = true;
         StackVec<WaitForFlushStampParams, 1> waitForFlushStampParamsPassed{};
 
-        WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+        WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
             waitForCompletionWithTimeoutCalled++;
             waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait});
             return waitForCompletionWithTimeoutResult;
@@ -98,7 +98,7 @@ struct KmdNotifyTests : public ::testing::Test {
         struct WaitForCompletionWithTimeoutParams {
             bool enableTimeout{};
             int64_t timeoutMs{};
-            uint32_t taskCountToWait{};
+            TaskCountType taskCountToWait{};
         };
 
         uint32_t waitForCompletionWithTimeoutCalled = 0u;
@@ -123,7 +123,7 @@ struct KmdNotifyTests : public ::testing::Test {
     std::unique_ptr<MockClDevice> device;
     std::unique_ptr<MockCommandQueue> cmdQ;
     FlushStamp flushStampToWait = 1000;
-    uint32_t taskCountToWait = 5;
+    TaskCountType taskCountToWait = 5;
 };
 
 HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTryCpuPolling) {
@@ -362,7 +362,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffLowerThanMinimumToCheckAcLineWhenObtain
     MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties));
 
     uint32_t hwTag = 9;
-    uint32_t taskCountToWait = 10;
+    TaskCountType taskCountToWait = 10;
     EXPECT_TRUE(taskCountToWait - hwTag < KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
     EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
 
@@ -376,7 +376,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndDisab
     MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties));
 
     uint32_t hwTag = 10;
-    uint32_t taskCountToWait = 21;
+    TaskCountType taskCountToWait = 21;
     EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
     EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
 
@@ -409,7 +409,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndEnabl
     MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties));
 
     uint32_t hwTag = 10;
-    uint32_t taskCountToWait = 21;
+    TaskCountType taskCountToWait = 21;
     EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
     EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine);
 
diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp
index c77dffdf4f..ead89cf244 100644
--- a/opencl/test/unit_test/helpers/task_information_tests.cpp
+++ b/opencl/test/unit_test/helpers/task_information_tests.cpp
@@ -221,7 +221,7 @@ class MockCsr1 : public CommandStreamReceiverHw<GfxFamily> {
   public:
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh,
-                              const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         passedDispatchFlags = dispatchFlags;
         return CompletionStamp();
     }
diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp
index 4f8dfaf40f..541b61d9ee 100644
--- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp
+++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp
@@ -770,8 +770,8 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinish
 
 namespace CpuIntrinsicsTests {
 extern std::atomic<uint32_t> pauseCounter;
-extern volatile uint32_t *pauseAddress;
-extern uint32_t pauseValue;
+extern volatile TagAddressType *pauseAddress;
+extern TaskCountType pauseValue;
 extern uint32_t pauseOffset;
 extern std::function<void()> setupPauseAddress;
 } // namespace CpuIntrinsicsTests
@@ -795,18 +795,18 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa
     EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
     EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
     VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
 
     deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u);
     timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u);
 
-    CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile uint32_t *>(const_cast<void *>(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u)));
+    CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u)));
     CpuIntrinsicsTests::pauseValue = 2u;
     CpuIntrinsicsTests::setupPauseAddress = [&]() {
-        CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile uint32_t *>(const_cast<void *>(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u)));
+        CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u)));
     };
     CpuIntrinsicsTests::pauseCounter = 0u;
     EXPECT_FALSE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp
index bab58544ea..1c98625825 100644
--- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp
+++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp
@@ -55,7 +55,7 @@ class MockCommandStreamReceiverHW : public UltCommandStreamReceiver<FamilyType>
         const IndirectHeap *dsh,
         const IndirectHeap *ioh,
         const IndirectHeap *ssh,
-        uint32_t taskLevel,
+        TaskCountType taskLevel,
         DispatchFlags &dispatchFlags,
         Device &device) override {
         stream = &commandStream;
diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp
index 1ed57ce4d6..02050c3358 100644
--- a/opencl/test/unit_test/kernel/kernel_tests.cpp
+++ b/opencl/test/unit_test/kernel/kernel_tests.cpp
@@ -540,10 +540,10 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
         return NEO::SubmissionStatus::SUCCESS;
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
         return WaitStatus::Ready;
     }
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
 
     CompletionStamp flushTask(
         LinearStream &commandStream,
@@ -551,7 +551,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
         const IndirectHeap *dsh,
         const IndirectHeap *ioh,
         const IndirectHeap *ssh,
-        uint32_t taskLevel,
+        TaskCountType taskLevel,
         DispatchFlags &dispatchFlags,
         Device &device) override {
         CompletionStamp cs = {};
diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
index e3f66b768a..46bc3ae07b 100644
--- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
+++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp
@@ -122,7 +122,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe
     kernel.kernelInfo.createKernelAllocation(*pDevice, false);
     auto firstAllocation = kernel.kernelInfo.kernelAllocation;
 
-    uint32_t notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u;
+    TaskCountType notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u;
 
     firstAllocation->updateTaskCount(notReadyTaskCount, commandStreamReceiver.getOsContext().getContextId());
 
diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
index 12213cab63..f594fa76ae 100644
--- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
@@ -37,7 +37,7 @@ struct BcsBufferTests : public ::testing::Test {
       public:
         using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
 
-        WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+        WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
                                                          bool useQuickKmdSleep, QueueThrottle throttle) override {
             EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait);
             EXPECT_EQ(0u, flushStampToWait);
@@ -49,7 +49,7 @@ struct BcsBufferTests : public ::testing::Test {
             return WaitStatus::Ready;
         }
 
-        WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override {
+        WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) override {
             EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled);
             EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount);
             waitForTaskCountAndCleanAllocationListCalled++;
diff --git a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp
index 02ff027377..5f83948a6a 100644
--- a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp
@@ -32,7 +32,7 @@ class MyCsr : public UltCommandStreamReceiver<Family> {
     MyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
         : UltCommandStreamReceiver<Family>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
 
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         waitForCompletionWithTimeoutCalled++;
         waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait});
         *this->getTagAddress() = getTagAddressValue;
@@ -42,7 +42,7 @@ class MyCsr : public UltCommandStreamReceiver<Family> {
     struct WaitForCompletionWithTimeoutParams {
         bool enableTimeout;
         int64_t timeoutMs;
-        uint32_t taskCountToWait;
+        TaskCountType taskCountToWait;
     };
 
     uint32_t waitForCompletionWithTimeoutCalled = 0u;
@@ -100,7 +100,7 @@ class MemObjDestructionTest : public ::testing::TestWithParam<bool> {
         *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady;
     }
 
-    constexpr static uint32_t taskCountReady = 3u;
+    constexpr static TaskCountType taskCountReady = 3u;
     ExecutionEnvironment *executionEnvironment = nullptr;
     std::unique_ptr<MockClDevice> device;
     uint32_t contextId = 0;
@@ -223,8 +223,8 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled
     memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId0);
     memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId1);
 
-    uint32_t expectedTaskCount0{};
-    uint32_t expectedTaskCount1{};
+    TaskCountType expectedTaskCount0{};
+    TaskCountType expectedTaskCount1{};
 
     if (hasCallbacks) {
         expectedTaskCount0 = allocation->getTaskCount(osContextId0);
@@ -266,7 +266,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled
     *mockCsr->getTagAddress() = 0;
     auto osContextId = mockCsr->getOsContext().getContextId();
 
-    uint32_t expectedTaskCount{};
+    TaskCountType expectedTaskCount{};
 
     if (hasAllocatedMappedPtr) {
         expectedTaskCount = allocation->getTaskCount(osContextId);
@@ -310,7 +310,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled
 
     auto osContextId = mockCsr->getOsContext().getContextId();
 
-    uint32_t expectedTaskCount{};
+    TaskCountType expectedTaskCount{};
 
     if (hasAllocatedMappedPtr) {
         expectedTaskCount = allocation->getTaskCount(osContextId);
@@ -346,7 +346,7 @@ HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsy
 
     auto osContextId = mockCsr->getOsContext().getContextId();
 
-    uint32_t expectedTaskCount = allocation->getTaskCount(osContextId);
+    TaskCountType expectedTaskCount = allocation->getTaskCount(osContextId);
 
     delete memObj;
 
@@ -396,7 +396,7 @@ HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithMapAllocationWhenAsyncDestruc
 
     auto osContextId = mockCsr->getOsContext().getContextId();
 
-    uint32_t expectedTaskCount{};
+    TaskCountType expectedTaskCount{};
 
     if (isMapAllocationUsed) {
         expectedTaskCount = mapAllocation->getTaskCount(osContextId);
diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp
index 6b367f06ad..884a7d08e2 100644
--- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp
+++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp
@@ -245,7 +245,7 @@ TEST_F(MemoryAllocatorTest, WhenAllocatingGraphicsMemoryThenAllocationHasCorrect
 
     ASSERT_NE(nullptr, allocation);
     // initial taskCount must be -1. if not, we may kill allocation before it will be used
-    EXPECT_EQ((uint32_t)-1, allocation->getTaskCount(csr->getOsContext().getContextId()));
+    EXPECT_EQ(std::numeric_limits<TaskCountType>::max(), allocation->getTaskCount(csr->getOsContext().getContextId()));
     // We know we want graphics memory to be page aligned
     EXPECT_EQ(0u, reinterpret_cast<uintptr_t>(allocation->getUnderlyingBuffer()) & (alignment - 1));
     EXPECT_EQ(Sharing::nonSharedResource, allocation->peekSharedHandle());
@@ -1902,7 +1902,7 @@ TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerReadyForCleanin
     auto fragment4 = hostPtrManager->getFragment({alignUp(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()});
     EXPECT_NE(nullptr, fragment4);
 
-    uint32_t taskCountReady = 1;
+    TaskCountType taskCountReady = 1;
     auto storage = csr->getInternalAllocationStorage();
     storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
     storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation2), TEMPORARY_ALLOCATION, taskCountReady);
diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h
index 533edb0ce5..1fe5fc2f00 100644
--- a/opencl/test/unit_test/mocks/mock_command_queue.h
+++ b/opencl/test/unit_test/mocks/mock_command_queue.h
@@ -94,7 +94,7 @@ class MockCommandQueue : public CommandQueue {
         return writeBufferRetValue;
     }
 
-    WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
+    WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
         latestTaskCountWaited = gpgpuTaskCountToWait;
 
         waitUntilCompleteCalledCount++;
@@ -105,7 +105,7 @@ class MockCommandQueue : public CommandQueue {
         return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
     }
 
-    WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
+    WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
         latestTaskCountWaited = gpgpuTaskCountToWait;
         return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
     }
@@ -212,7 +212,7 @@ class MockCommandQueue : public CommandQueue {
 
     bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; }
 
-    bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
+    bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override {
         waitForTimestampsCalled = true;
         return false;
     };
@@ -227,7 +227,7 @@ class MockCommandQueue : public CommandQueue {
     void *writeBufferPtr = nullptr;
     size_t requestedCmdStreamSize = 0;
     GraphicsAllocation *writeMapAllocation = nullptr;
-    std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
+    std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<TaskCountType>::max()};
     std::optional<WaitStatus> waitUntilCompleteReturnValue{};
     int waitUntilCompleteCalledCount{0};
 };
@@ -353,7 +353,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
         useBcsCsrOnNotifyEnabled = notifyBcsCsr;
     }
 
-    WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
+    WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
         latestTaskCountWaited = gpgpuTaskCountToWait;
         if (waitUntilCompleteReturnValue.has_value()) {
             return *waitUntilCompleteReturnValue;
@@ -417,7 +417,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
         bool returnValue = false;
     } overrideIsCacheFlushForBcsRequired;
     BuiltinOpParams kernelParams;
-    std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
+    std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
     bool flushCalled = false;
     std::optional<WaitStatus> waitForAllEnginesReturnValue{};
     std::optional<WaitStatus> waitUntilCompleteReturnValue{};
diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp
index 03acd5577c..dd2964ae12 100644
--- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp
+++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp
@@ -1140,7 +1140,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
 
     EXPECT_NE(0u, mock->waitUserFenceCall.ctxId);
     EXPECT_EQ(-1, mock->waitUserFenceCall.timeout);
-    EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth);
+    EXPECT_EQ(Drm::ValueWidth::U64, mock->waitUserFenceCall.dataWidth);
 }
 
 HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
@@ -1234,7 +1234,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
 
     EXPECT_EQ(0u, mock->waitUserFenceCall.ctxId);
     EXPECT_EQ(1000, mock->waitUserFenceCall.timeout);
-    EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth);
+    EXPECT_EQ(Drm::ValueWidth::U64, mock->waitUserFenceCall.dataWidth);
 }
 
 HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest,
diff --git a/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp
index 3a9fd4b0d9..8dae5a78ea 100644
--- a/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp
+++ b/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp
@@ -40,7 +40,7 @@ struct BlitXE_HP_CORETests : public ::testing::Test {
         clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
     }
 
-    std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
+    std::optional<TaskCountType> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
         BlitPropertiesContainer blitPropertiesContainer;
         blitPropertiesContainer.push_back(blitProperties);
 
diff --git a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp
index 315ff8b982..ea6bc1e9a1 100644
--- a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp
+++ b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp
@@ -40,7 +40,7 @@ struct BlitXeHpcCoreTests : public ::testing::Test {
         clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
     }
 
-    std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
+    std::optional<TaskCountType> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
         BlitPropertiesContainer blitPropertiesContainer;
         blitPropertiesContainer.push_back(blitProperties);
 
diff --git a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp
index 39d8f411ab..a34a085986 100644
--- a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp
+++ b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp
@@ -41,7 +41,7 @@ struct BlitXeHpgCoreTests : public ::testing::Test {
         clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
     }
 
-    std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
+    std::optional<TaskCountType> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
         BlitPropertiesContainer blitPropertiesContainer;
         blitPropertiesContainer.push_back(blitProperties);
 
diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt
index 001632f641..9ecf2b794d 100644
--- a/shared/source/command_stream/CMakeLists.txt
+++ b/shared/source/command_stream/CMakeLists.txt
@@ -52,6 +52,7 @@ set(NEO_CORE_COMMAND_STREAM
     ${CMAKE_CURRENT_SOURCE_DIR}/submission_status.h
     ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/task_count_helper.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h
     ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h
diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw.h b/shared/source/command_stream/aub_command_stream_receiver_hw.h
index 49e33951fe..6f426c75d9 100644
--- a/shared/source/command_stream/aub_command_stream_receiver_hw.h
+++ b/shared/source/command_stream/aub_command_stream_receiver_hw.h
@@ -56,7 +56,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
     MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
     void pollForCompletion() override;
     void pollForCompletionImpl() override;
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
 
     uint32_t getDumpHandle();
     MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
@@ -112,7 +112,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
     bool isEngineInitialized = false;
     ExternalAllocationsContainer externalAllocations;
 
-    uint32_t pollForCompletionTaskCount = 0u;
+    TaskCountType pollForCompletionTaskCount = 0u;
     SpinLock pollForCompletionLock;
 };
 } // namespace NEO
diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl
index 771cdbab2d..c5e2dfd2de 100644
--- a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl
+++ b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl
@@ -312,7 +312,7 @@ SubmissionStatus AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batch
     if (subCaptureManager->isSubCaptureMode()) {
         if (!subCaptureManager->isSubCaptureEnabled()) {
             if (this->standalone) {
-                volatile uint32_t *pollAddress = this->tagAddress;
+                volatile TagAddressType *pollAddress = this->tagAddress;
                 for (uint32_t i = 0; i < this->activePartitions; i++) {
                     *pollAddress = this->peekLatestSentTaskCount();
                     pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset);
@@ -353,7 +353,7 @@ SubmissionStatus AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batch
     submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
 
     if (this->standalone) {
-        volatile uint32_t *pollAddress = this->tagAddress;
+        volatile TagAddressType *pollAddress = this->tagAddress;
         for (uint32_t i = 0; i < this->activePartitions; i++) {
             *pollAddress = this->peekLatestSentTaskCount();
             pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset);
@@ -614,7 +614,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
 }
 
 template <typename GfxFamily>
-inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
+inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
     const auto result = CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
     pollForCompletion();
 
diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp
index 989325bb0f..97936e4d2c 100644
--- a/shared/source/command_stream/command_stream_receiver.cpp
+++ b/shared/source/command_stream/command_stream_receiver.cpp
@@ -180,7 +180,7 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf
     makeResident(*gfxAllocation);
 }
 
-WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) {
+WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCount) {
     auto address = getTagAddress();
     if (!skipResourceCleanup() && address) {
         this->downloadTagAllocation(requiredTaskCount);
@@ -190,7 +190,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) {
     return WaitStatus::Ready;
 }
 
-WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
+WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage) {
     WaitStatus waitStatus{WaitStatus::Ready};
     auto &list = allocationUsage == TEMPORARY_ALLOCATION ? internalAllocationStorage->getTemporaryAllocations() : internalAllocationStorage->getAllocationsForReuse();
     if (!list.peekIsEmpty()) {
@@ -201,7 +201,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_
     return waitStatus;
 }
 
-WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) {
+WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) {
     return waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION);
 }
 
@@ -358,13 +358,13 @@ void CommandStreamReceiver::cleanupResources() {
     }
 }
 
-WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) {
+WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) {
     bool printWaitForCompletion = DebugManager.flags.LogWaitingForCompletion.get();
     if (printWaitForCompletion) {
         printTagAddressContent(taskCountToWait, params.waitTimeout, true);
     }
 
-    uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
+    TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
     if (latestSentTaskCount < taskCountToWait) {
         if (!this->flushBatchedSubmissions()) {
             const auto isGpuHang{isGpuHangDetected()};
@@ -391,15 +391,15 @@ bool CommandStreamReceiver::checkGpuHangDetected(TimeType currentTime, TimeType
     return false;
 }
 
-WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams &params, uint32_t taskCountToWait) {
+WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams &params, TaskCountType taskCountToWait) {
     std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime;
     int64_t timeDiff = 0;
 
-    uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
+    TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
     if (latestSentTaskCount < taskCountToWait) {
         this->flushTagUpdate();
     }
-    volatile uint32_t *partitionAddress = pollAddress;
+    volatile TagAddressType *partitionAddress = pollAddress;
 
     waitStartTime = std::chrono::high_resolution_clock::now();
     lastHangCheckTime = waitStartTime;
@@ -438,7 +438,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddres
 void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
     this->tagAllocation = allocation;
     UNRECOVERABLE_IF(allocation == nullptr);
-    this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
+    this->tagAddress = reinterpret_cast<TagAddressType *>(allocation->getUnderlyingBuffer());
     this->debugPauseStateAddress = reinterpret_cast<DebugPauseState *>(
         reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset);
 }
@@ -855,7 +855,7 @@ void CommandStreamReceiver::printDeviceIndex() {
     }
 }
 
-void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) {
+void CommandStreamReceiver::checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation) {
     if (useNewResourceImplicitFlush) {
         if (allocationTaskCount == GraphicsAllocation::objectNotUsed && !GraphicsAllocation::isIsaAllocationType(gfxAllocation.getAllocationType())) {
             newResources = true;
@@ -875,7 +875,7 @@ bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() {
     return false;
 }
 
-void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) {
+void CommandStreamReceiver::downloadTagAllocation(TaskCountType taskCountToWait) {
     if (this->getTagAllocation()) {
         if (taskCountToWait && taskCountToWait <= this->peekLatestFlushedTaskCount()) {
             this->downloadAllocation(*this->getTagAllocation());
@@ -883,7 +883,7 @@ void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) {
     }
 }
 
-bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) {
+bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) {
     this->downloadTagAllocation(taskCountToWait);
     for (uint32_t i = 0; i < activePartitions; i++) {
         if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) {
@@ -903,7 +903,7 @@ const RootDeviceEnvironment &CommandStreamReceiver::peekRootDeviceEnvironment()
     return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex];
 }
 
-uint32_t CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
+TaskCountType CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
     if (completionFenceValuePointer) {
         return *completionFenceValuePointer;
     }
@@ -920,7 +920,7 @@ bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) {
     return perDssBackedBuffer != nullptr;
 }
 
-void CommandStreamReceiver::printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start) {
+void CommandStreamReceiver::printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start) {
     auto postSyncAddress = getTagAddress();
     if (start) {
         PRINT_DEBUG_STRING(true, stdout,
@@ -941,7 +941,7 @@ LogicalStateHelper *CommandStreamReceiver::getLogicalStateHelper() const {
     return logicalStateHelper.get();
 }
 
-uint32_t CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) {
+TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) {
     switch (status) {
     case SubmissionStatus::OUT_OF_HOST_MEMORY:
         return CompletionStamp::outOfHostMemory;
diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h
index 344635b656..8224711d70 100644
--- a/shared/source/command_stream/command_stream_receiver.h
+++ b/shared/source/command_stream/command_stream_receiver.h
@@ -13,6 +13,7 @@
 #include "shared/source/command_stream/stream_properties.h"
 #include "shared/source/command_stream/submission_status.h"
 #include "shared/source/command_stream/submissions_aggregator.h"
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/command_stream/wait_status.h"
 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/blit_commands_helper.h"
@@ -86,7 +87,7 @@ class CommandStreamReceiver {
 
     virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                                       const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                                      uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
+                                      TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
 
     virtual bool flushBatchedSubmissions() = 0;
     MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
@@ -112,9 +113,9 @@ class CommandStreamReceiver {
     virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
     bool needsPageTableManager() const;
 
-    MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(uint32_t requiredTaskCount);
-    WaitStatus waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage);
-    MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount);
+    MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(TaskCountType requiredTaskCount);
+    WaitStatus waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage);
+    MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount);
 
     LinearStream &getCS(size_t minRequiredSize = 1024u);
     OSInterface *getOSInterface() const;
@@ -129,19 +130,19 @@ class CommandStreamReceiver {
         return tagsMultiAllocation;
     }
     MultiGraphicsAllocation &createTagsMultiAllocation();
-    volatile uint32_t *getTagAddress() const { return tagAddress; }
+    volatile TagAddressType *getTagAddress() const { return tagAddress; }
     uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
 
     virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }
 
-    uint32_t peekTaskCount() const { return taskCount; }
+    TaskCountType peekTaskCount() const { return taskCount; }
 
-    uint32_t peekTaskLevel() const { return taskLevel; }
+    TaskCountType peekTaskLevel() const { return taskLevel; }
     FlushStamp obtainCurrentFlushStamp() const;
 
-    uint32_t peekLatestSentTaskCount() const { return latestSentTaskCount; }
+    TaskCountType peekLatestSentTaskCount() const { return latestSentTaskCount; }
 
-    uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
+    TaskCountType peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
 
     void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; }
     bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; }
@@ -165,10 +166,10 @@ class CommandStreamReceiver {
     void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
     bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
 
-    virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0;
-    virtual WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait);
-    WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams &params, uint32_t taskCountToWait);
-    MOCKABLE_VIRTUAL bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait);
+    virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0;
+    virtual WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait);
+    WaitStatus baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams &params, TaskCountType taskCountToWait);
+    MOCKABLE_VIRTUAL bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait);
     virtual void downloadAllocations(){};
 
     void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
@@ -225,14 +226,14 @@ class CommandStreamReceiver {
 
     virtual MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const = 0;
 
-    void setLatestSentTaskCount(uint32_t latestSentTaskCount) {
+    void setLatestSentTaskCount(TaskCountType latestSentTaskCount) {
         this->latestSentTaskCount = latestSentTaskCount;
     }
-    void setLatestFlushedTaskCount(uint32_t latestFlushedTaskCount) {
+    void setLatestFlushedTaskCount(TaskCountType latestFlushedTaskCount) {
         this->latestFlushedTaskCount = latestFlushedTaskCount;
     }
 
-    virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
+    virtual TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
 
     virtual SubmissionStatus flushTagUpdate() = 0;
     virtual void updateTagFromWait() = 0;
@@ -333,7 +334,7 @@ class CommandStreamReceiver {
     MOCKABLE_VIRTUAL bool checkGpuHangDetected(TimeType currentTime, TimeType &lastHangCheckTime) const;
 
     uint64_t getCompletionAddress() const {
-        uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
+        uint64_t completionFenceAddress = castToUint64(const_cast<TagAddressType *>(tagAddress));
         if (completionFenceAddress == 0) {
             return 0;
         }
@@ -341,7 +342,7 @@ class CommandStreamReceiver {
         return completionFenceAddress;
     }
 
-    uint32_t getCompletionValue(const GraphicsAllocation &gfxAllocation);
+    TaskCountType getCompletionValue(const GraphicsAllocation &gfxAllocation);
     DispatchMode getDispatchMode() const {
         return this->dispatchMode;
     }
@@ -387,10 +388,10 @@ class CommandStreamReceiver {
   protected:
     void cleanupResources();
     void printDeviceIndex();
-    void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
+    void checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation);
     bool checkImplicitFlushForGpuIdle();
-    void downloadTagAllocation(uint32_t taskCountToWait);
-    void printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start);
+    void downloadTagAllocation(TaskCountType taskCountToWait);
+    void printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start);
     [[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainHostPtrSurfaceCreationLock();
 
     std::unique_ptr<FlushStampTracker> flushStamp;
@@ -421,7 +422,7 @@ class CommandStreamReceiver {
     const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
     uint64_t totalMemoryUsed = 0u;
 
-    volatile uint32_t *tagAddress = nullptr;
+    volatile TagAddressType *tagAddress = nullptr;
     volatile DebugPauseState *debugPauseStateAddress = nullptr;
     SpinLock debugPauseStateLock;
     static void *asyncDebugBreakConfirmation(void *arg);
@@ -441,14 +442,14 @@ class CommandStreamReceiver {
 
     IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES];
     OsContext *osContext = nullptr;
-    uint32_t *completionFenceValuePointer = nullptr;
+    TaskCountType *completionFenceValuePointer = nullptr;
 
     // current taskLevel.  Used for determining if a PIPE_CONTROL is needed.
-    std::atomic<uint32_t> taskLevel{0};
-    std::atomic<uint32_t> latestSentTaskCount{0};
-    std::atomic<uint32_t> latestFlushedTaskCount{0};
+    std::atomic<TaskCountType> taskLevel{0};
+    std::atomic<TaskCountType> latestSentTaskCount{0};
+    std::atomic<TaskCountType> latestFlushedTaskCount{0};
     // taskCount - # of tasks submitted
-    std::atomic<uint32_t> taskCount{0};
+    std::atomic<TaskCountType> taskCount{0};
 
     std::atomic<uint32_t> numClients = 0u;
 
@@ -470,7 +471,7 @@ class CommandStreamReceiver {
     uint32_t activePartitionsConfig = 1;
     uint32_t postSyncWriteOffset = 0;
     uint32_t completionFenceOffset = 0;
-    uint32_t completionFenceValue = 0;
+    TaskCountType completionFenceValue = 0;
 
     const uint32_t rootDeviceIndex;
     const DeviceBitfield deviceBitfield;
diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h
index 1aa566c358..ffb0d1df72 100644
--- a/shared/source/command_stream/command_stream_receiver_hw.h
+++ b/shared/source/command_stream/command_stream_receiver_hw.h
@@ -43,7 +43,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                              uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
+                              TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
 
     void forcePipeControl(NEO::LinearStream &commandStreamCSR);
 
@@ -77,7 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
     bool isPipelineSelectAlreadyProgrammed() const;
     void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
 
     void collectStateBaseAddresPatchInfo(
         uint64_t commandBufferAddress,
@@ -96,7 +96,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
         return CommandStreamReceiverType::CSR_HW;
     }
 
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
 
     SubmissionStatus flushTagUpdate() override;
     SubmissionStatus flushMiFlushDW();
diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl
index c061b852ef..4ed0e9e8e8 100644
--- a/shared/source/command_stream/command_stream_receiver_hw_base.inl
+++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl
@@ -178,7 +178,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
     const IndirectHeap *dsh,
     const IndirectHeap *ioh,
     const IndirectHeap *ssh,
-    uint32_t taskLevel,
+    TaskCountType taskLevel,
     DispatchFlags &dispatchFlags,
     Device &device) {
     typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
@@ -926,7 +926,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect()
 }
 
 template <typename GfxFamily>
-inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
+inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
     const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(),
                                                              this->isAnyDirectSubmissionEnabled());
 
@@ -1075,7 +1075,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromCo
 }
 
 template <typename GfxFamily>
-uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
+TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
     using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
     using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
 
diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h
index ed0853a643..e0d9b8e59f 100644
--- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h
+++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h
@@ -40,7 +40,7 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
         return CommandStreamReceiverType::CSR_HW_WITH_AUB;
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
                                                      bool useQuickKmdSleep, QueueThrottle throttle) override;
 
     size_t getPreferredTagPoolSize() const override { return 1; }
diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl
index 36a408d754..12ee85f55c 100644
--- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl
+++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl
@@ -78,7 +78,7 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
 }
 
 template <typename BaseCSR>
-WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
                                                                                             bool useQuickKmdSleep, QueueThrottle throttle) {
     if (aubCSR) {
         aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
diff --git a/shared/source/command_stream/csr_deps.h b/shared/source/command_stream/csr_deps.h
index 3003db5f22..a601179e88 100644
--- a/shared/source/command_stream/csr_deps.h
+++ b/shared/source/command_stream/csr_deps.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/utilities/stackvec.h"
 
 namespace NEO {
@@ -21,7 +22,7 @@ class CsrDependencies {
         All
     };
 
-    StackVec<std::pair<uint32_t, uint64_t>, 32> taskCountContainer;
+    StackVec<std::pair<TaskCountType, uint64_t>, 32> taskCountContainer;
     StackVec<TimestampPacketContainer *, 32> timestampPacketContainer;
 
     void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
diff --git a/shared/source/command_stream/scratch_space_controller.h b/shared/source/command_stream/scratch_space_controller.h
index c684dbd1ea..56ae9e21c3 100644
--- a/shared/source/command_stream/scratch_space_controller.h
+++ b/shared/source/command_stream/scratch_space_controller.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ class ScratchSpaceController {
                                          uint32_t scratchSlot,
                                          uint32_t requiredPerThreadScratchSize,
                                          uint32_t requiredPerThreadPrivateScratchSize,
-                                         uint32_t currentTaskCount,
+                                         TaskCountType currentTaskCount,
                                          OsContext &osContext,
                                          bool &stateBaseAddressDirty,
                                          bool &vfeStateDirty) = 0;
@@ -62,14 +62,14 @@ class ScratchSpaceController {
                               uint32_t scratchSlot,
                               uint32_t requiredPerThreadScratchSize,
                               uint32_t requiredPerThreadPrivateScratchSize,
-                              uint32_t currentTaskCount,
+                              TaskCountType currentTaskCount,
                               OsContext &osContext,
                               bool &stateBaseAddressDirty,
                               bool &vfeStateDirty) = 0;
     virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                        uint32_t requiredPerThreadScratchSize,
                                                        uint32_t requiredPerThreadPrivateScratchSize,
-                                                       uint32_t currentTaskCount,
+                                                       TaskCountType currentTaskCount,
                                                        OsContext &osContext,
                                                        bool &stateBaseAddressDirty,
                                                        bool &vfeStateDirty,
diff --git a/shared/source/command_stream/scratch_space_controller_base.cpp b/shared/source/command_stream/scratch_space_controller_base.cpp
index d77d20de3e..6d1c05658b 100644
--- a/shared/source/command_stream/scratch_space_controller_base.cpp
+++ b/shared/source/command_stream/scratch_space_controller_base.cpp
@@ -26,7 +26,7 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
                                                          uint32_t scratchSlot,
                                                          uint32_t requiredPerThreadScratchSize,
                                                          uint32_t requiredPerThreadPrivateScratchSize,
-                                                         uint32_t currentTaskCount,
+                                                         TaskCountType currentTaskCount,
                                                          OsContext &osContext,
                                                          bool &stateBaseAddressDirty,
                                                          bool &vfeStateDirty) {
@@ -85,7 +85,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
                                               uint32_t offset,
                                               uint32_t requiredPerThreadScratchSize,
                                               uint32_t requiredPerThreadPrivateScratchSize,
-                                              uint32_t currentTaskCount,
+                                              TaskCountType currentTaskCount,
                                               OsContext &osContext,
                                               bool &stateBaseAddressDirty,
                                               bool &vfeStateDirty) {
@@ -94,7 +94,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
 void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                                        uint32_t requiredPerThreadScratchSize,
                                                                        uint32_t requiredPerThreadPrivateScratchSize,
-                                                                       uint32_t currentTaskCount,
+                                                                       TaskCountType currentTaskCount,
                                                                        OsContext &osContext,
                                                                        bool &stateBaseAddressDirty,
                                                                        bool &vfeStateDirty,
diff --git a/shared/source/command_stream/scratch_space_controller_base.h b/shared/source/command_stream/scratch_space_controller_base.h
index 26a548830a..f459c08fed 100644
--- a/shared/source/command_stream/scratch_space_controller_base.h
+++ b/shared/source/command_stream/scratch_space_controller_base.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -18,7 +18,7 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
                                  uint32_t scratchSlot,
                                  uint32_t requiredPerThreadScratchSize,
                                  uint32_t requiredPerThreadPrivateScratchSize,
-                                 uint32_t currentTaskCount,
+                                 TaskCountType currentTaskCount,
                                  OsContext &osContext,
                                  bool &stateBaseAddressDirty,
                                  bool &vfeStateDirty) override;
@@ -31,14 +31,14 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
                       uint32_t scratchSlot,
                       uint32_t requiredPerThreadScratchSize,
                       uint32_t requiredPerThreadPrivateScratchSize,
-                      uint32_t currentTaskCount,
+                      TaskCountType currentTaskCount,
                       OsContext &osContext,
                       bool &stateBaseAddressDirty,
                       bool &vfeStateDirty) override;
     void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                uint32_t requiredPerThreadScratchSize,
                                                uint32_t requiredPerThreadPrivateScratchSize,
-                                               uint32_t currentTaskCount,
+                                               TaskCountType currentTaskCount,
                                                OsContext &osContext,
                                                bool &stateBaseAddressDirty,
                                                bool &vfeStateDirty,
diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp
index afe8329e4d..4b82201468 100644
--- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp
+++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp
@@ -54,7 +54,7 @@ void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAd
                                                                  uint32_t offset,
                                                                  uint32_t requiredPerThreadScratchSize,
                                                                  uint32_t requiredPerThreadPrivateScratchSize,
-                                                                 uint32_t currentTaskCount,
+                                                                 TaskCountType currentTaskCount,
                                                                  OsContext &osContext,
                                                                  bool &stateBaseAddressDirty,
                                                                  bool &vfeStateDirty) {
@@ -135,7 +135,7 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType
 void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                                                uint32_t requiredPerThreadScratchSize,
                                                                                uint32_t requiredPerThreadPrivateScratchSize,
-                                                                               uint32_t currentTaskCount,
+                                                                               TaskCountType currentTaskCount,
                                                                                OsContext &osContext,
                                                                                bool &stateBaseAddressDirty,
                                                                                bool &vfeStateDirty,
@@ -152,7 +152,7 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
 
 void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
                                                                   uint32_t requiredPerThreadPrivateScratchSize,
-                                                                  uint32_t currentTaskCount,
+                                                                  TaskCountType currentTaskCount,
                                                                   OsContext &osContext,
                                                                   bool &stateBaseAddressDirty,
                                                                   bool &scratchSurfaceDirty,
@@ -193,7 +193,7 @@ void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContain
                                                       uint32_t scratchSlot,
                                                       uint32_t requiredPerThreadScratchSize,
                                                       uint32_t requiredPerThreadPrivateScratchSize,
-                                                      uint32_t currentTaskCount,
+                                                      TaskCountType currentTaskCount,
                                                       OsContext &osContext,
                                                       bool &stateBaseAddressDirty,
                                                       bool &vfeStateDirty) {
diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h
index c0f169cc0a..1fd2060c66 100644
--- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h
+++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
                                  uint32_t scratchSlot,
                                  uint32_t requiredPerThreadScratchSize,
                                  uint32_t requiredPerThreadPrivateScratchSize,
-                                 uint32_t currentTaskCount,
+                                 TaskCountType currentTaskCount,
                                  OsContext &osContext,
                                  bool &stateBaseAddressDirty,
                                  bool &vfeStateDirty) override;
@@ -38,14 +38,14 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
                       uint32_t scratchSlot,
                       uint32_t requiredPerThreadScratchSize,
                       uint32_t requiredPerThreadPrivateScratchSize,
-                      uint32_t currentTaskCount,
+                      TaskCountType currentTaskCount,
                       OsContext &osContext,
                       bool &stateBaseAddressDirty,
                       bool &vfeStateDirty) override;
     void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                uint32_t requiredPerThreadScratchSize,
                                                uint32_t requiredPerThreadPrivateScratchSize,
-                                               uint32_t currentTaskCount,
+                                               TaskCountType currentTaskCount,
                                                OsContext &osContext,
                                                bool &stateBaseAddressDirty,
                                                bool &vfeStateDirty,
@@ -56,7 +56,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
     MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation);
     MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
                                                    uint32_t requiredPerThreadPrivateScratchSize,
-                                                   uint32_t currentTaskCount,
+                                                   TaskCountType currentTaskCount,
                                                    OsContext &osContext,
                                                    bool &stateBaseAddressDirty,
                                                    bool &scratchSurfaceDirty,
diff --git a/shared/source/command_stream/submissions_aggregator.h b/shared/source/command_stream/submissions_aggregator.h
index fcc59aaf9d..6119f34454 100644
--- a/shared/source/command_stream/submissions_aggregator.h
+++ b/shared/source/command_stream/submissions_aggregator.h
@@ -63,7 +63,7 @@ struct CommandBuffer : public IDNode<CommandBuffer> {
     BatchBuffer batchBuffer;
     void *batchBufferEndLocation = nullptr;
     uint32_t inspectionId = 0;
-    uint32_t taskCount = 0u;
+    TaskCountType taskCount = 0u;
     void *pipeControlThatMayBeErasedLocation = nullptr;
     void *epiloguePipeControlLocation = nullptr;
     PipeControlArgs epiloguePipeControlArgs;
diff --git a/shared/source/command_stream/task_count_helper.h b/shared/source/command_stream/task_count_helper.h
new file mode 100644
index 0000000000..5fe0423ec5
--- /dev/null
+++ b/shared/source/command_stream/task_count_helper.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2022 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+#include <stdint.h>
+
+using TaskCountType = uint32_t;
+using TagAddressType = uint32_t;
\ No newline at end of file
diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h
index 3f369bd9c7..bcfdac3ca1 100644
--- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h
+++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h
@@ -33,7 +33,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
 
     uint32_t getMaskAndValueForPollForCompletion() const;
     bool getpollNotEqualValueForPollForCompletion() const;
-    void flushSubmissionsAndDownloadAllocations(uint32_t taskCount);
+    void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount);
 
   public:
     using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
@@ -45,8 +45,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
 
     SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override;
     void downloadAllocations() override;
     void downloadAllocationTbx(GraphicsAllocation &gfxAllocation);
 
diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl
index ad12edad9a..c61b35457b 100644
--- a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl
+++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl
@@ -474,14 +474,14 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress,
 }
 
 template <typename GfxFamily>
-void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(uint32_t taskCountToWait) {
+void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) {
     this->flushBatchedSubmissions();
 
     if (this->latestFlushedTaskCount < taskCountToWait) {
         this->flushTagUpdate();
     }
 
-    volatile uint32_t *pollAddress = this->getTagAddress();
+    volatile TagAddressType *pollAddress = this->getTagAddress();
     for (uint32_t i = 0; i < this->activePartitions; i++) {
         while (*pollAddress < this->latestFlushedTaskCount) {
             this->downloadAllocation(*this->getTagAllocation());
@@ -497,13 +497,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
 }
 
 template <typename GfxFamily>
-WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
+WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
     flushSubmissionsAndDownloadAllocations(taskCountToWait);
     return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
 }
 
 template <typename GfxFamily>
-WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) {
+WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) {
     flushSubmissionsAndDownloadAllocations(taskCountToWait);
     return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait);
 }
@@ -554,7 +554,7 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocationTbx(GraphicsAlloca
 
 template <typename GfxFamily>
 void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
-    volatile uint32_t *pollAddress = this->getTagAddress();
+    volatile TagAddressType *pollAddress = this->getTagAddress();
     for (uint32_t i = 0; i < this->activePartitions; i++) {
         while (*pollAddress < this->latestFlushedTaskCount) {
             this->downloadAllocation(*this->getTagAllocation());
diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h
index 1ae8a3241f..a596c4fd67 100644
--- a/shared/source/direct_submission/direct_submission_controller.h
+++ b/shared/source/direct_submission/direct_submission_controller.h
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/common_types.h"
 
 #include <array>
@@ -35,7 +36,7 @@ class DirectSubmissionController {
   protected:
     struct DirectSubmissionState {
         bool isStopped = true;
-        uint32_t taskCount = 0u;
+        TaskCountType taskCount = 0u;
     };
 
     static void *controlDirectSubmissionsState(void *self);
diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h
index 163bdd07cf..022af122bf 100644
--- a/shared/source/direct_submission/direct_submission_hw.h
+++ b/shared/source/direct_submission/direct_submission_hw.h
@@ -87,7 +87,7 @@ class DirectSubmissionHw {
 
     static std::unique_ptr<DirectSubmissionHw<GfxFamily, Dispatcher>> create(const DirectSubmissionInputParams &inputParams);
 
-    virtual uint32_t *getCompletionValuePointer() { return nullptr; }
+    virtual TaskCountType *getCompletionValuePointer() { return nullptr; }
 
     bool isRelaxedOrderingEnabled() const {
         return relaxedOrderingEnabled;
diff --git a/shared/source/direct_submission/linux/drm_direct_submission.h b/shared/source/direct_submission/linux/drm_direct_submission.h
index 9e865fb6cf..cb224db37c 100644
--- a/shared/source/direct_submission/linux/drm_direct_submission.h
+++ b/shared/source/direct_submission/linux/drm_direct_submission.h
@@ -20,7 +20,7 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
 
     ~DrmDirectSubmission() override;
 
-    uint32_t *getCompletionValuePointer() override;
+    TaskCountType *getCompletionValuePointer() override;
 
   protected:
     bool allocateOsResources() override;
@@ -37,10 +37,10 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
     bool isCompleted(uint32_t ringBufferIndex) override;
     bool isCompletionFenceSupported();
 
-    MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait);
+    MOCKABLE_VIRTUAL void wait(TaskCountType taskCountToWait);
 
     TagData currentTagData{};
-    volatile uint32_t *tagAddress;
-    uint32_t completionFenceValue{};
+    volatile TagAddressType *tagAddress;
+    TaskCountType completionFenceValue{};
 };
 } // namespace NEO
diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl
index 77c2b77f15..3305f57f86 100644
--- a/shared/source/direct_submission/linux/drm_direct_submission.inl
+++ b/shared/source/direct_submission/linux/drm_direct_submission.inl
@@ -82,7 +82,7 @@ inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
 }
 
 template <typename GfxFamily, typename Dispatcher>
-uint32_t *DrmDirectSubmission<GfxFamily, Dispatcher>::getCompletionValuePointer() {
+TaskCountType *DrmDirectSubmission<GfxFamily, Dispatcher>::getCompletionValuePointer() {
     if (this->isCompletionFenceSupported()) {
         return &this->completionFenceValue;
     }
@@ -93,7 +93,7 @@ template <typename GfxFamily, typename Dispatcher>
 bool DrmDirectSubmission<GfxFamily, Dispatcher>::allocateOsResources() {
     this->currentTagData.tagAddress = this->semaphoreGpuVa + offsetof(RingSemaphoreData, tagAllocation);
     this->currentTagData.tagValue = 0u;
-    this->tagAddress = reinterpret_cast<volatile uint32_t *>(reinterpret_cast<uint8_t *>(this->semaphorePtr) + offsetof(RingSemaphoreData, tagAllocation));
+    this->tagAddress = reinterpret_cast<volatile TagAddressType *>(reinterpret_cast<uint8_t *>(this->semaphorePtr) + offsetof(RingSemaphoreData, tagAllocation));
     return true;
 }
 
@@ -116,7 +116,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
     bool ret = false;
     uint32_t drmContextId = 0u;
 
-    uint32_t completionValue = 0u;
+    TaskCountType completionValue = 0u;
     uint64_t completionFenceGpuAddress = 0u;
     if (this->isCompletionFenceSupported()) {
         completionValue = ++completionFenceValue;
@@ -246,7 +246,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::isCompletionFenceSupported() {
 }
 
 template <typename GfxFamily, typename Dispatcher>
-void DrmDirectSubmission<GfxFamily, Dispatcher>::wait(uint32_t taskCountToWait) {
+void DrmDirectSubmission<GfxFamily, Dispatcher>::wait(TaskCountType taskCountToWait) {
     auto pollAddress = this->tagAddress;
     for (uint32_t i = 0; i < this->activeTiles; i++) {
         while (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) {
diff --git a/shared/source/helpers/completion_stamp.h b/shared/source/helpers/completion_stamp.h
index d14a317b9e..1a9e6c15cf 100644
--- a/shared/source/helpers/completion_stamp.h
+++ b/shared/source/helpers/completion_stamp.h
@@ -7,22 +7,25 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
+
 #include <cstdint>
+#include <limits>
 
 namespace NEO {
 using FlushStamp = uint64_t;
 enum class SubmissionStatus : uint32_t;
 struct CompletionStamp {
-    static uint32_t getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus);
+    static TaskCountType getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus);
 
-    uint32_t taskCount;
-    uint32_t taskLevel;
+    TaskCountType taskCount;
+    TaskCountType taskLevel;
     FlushStamp flushStamp;
 
-    static constexpr uint32_t notReady = 0xFFFFFFF0;
-    static constexpr uint32_t gpuHang = 0xFFFFFFFA;
-    static constexpr uint32_t outOfDeviceMemory = 0xFFFFFFFB;
-    static constexpr uint32_t outOfHostMemory = 0xFFFFFFFC;
+    static constexpr TaskCountType notReady = std::numeric_limits<TaskCountType>::max() - 0xF;
+    static constexpr TaskCountType gpuHang = std::numeric_limits<TaskCountType>::max() - 0x5;
+    static constexpr TaskCountType outOfDeviceMemory = std::numeric_limits<TaskCountType>::max() - 0x4;
+    static constexpr TaskCountType outOfHostMemory = std::numeric_limits<TaskCountType>::max() - 0x3;
 };
 
 } // namespace NEO
diff --git a/shared/source/helpers/kmd_notify_properties.cpp b/shared/source/helpers/kmd_notify_properties.cpp
index 926474d95d..9b605d1c69 100644
--- a/shared/source/helpers/kmd_notify_properties.cpp
+++ b/shared/source/helpers/kmd_notify_properties.cpp
@@ -8,6 +8,7 @@
 #include "shared/source/helpers/kmd_notify_properties.h"
 
 #include "shared/source/command_stream/queue_throttle.h"
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/debug_settings/debug_settings_manager.h"
 
 #include <chrono>
@@ -16,8 +17,8 @@
 using namespace NEO;
 
 WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest,
-                                                uint32_t currentHwTag,
-                                                uint32_t taskCountToWait,
+                                                TagAddressType currentHwTag,
+                                                TaskCountType taskCountToWait,
                                                 FlushStamp flushStampToWait,
                                                 QueueThrottle throttle,
                                                 bool kmdWaitModeActive,
diff --git a/shared/source/helpers/kmd_notify_properties.h b/shared/source/helpers/kmd_notify_properties.h
index e64f3bc84f..ecebfb54fa 100644
--- a/shared/source/helpers/kmd_notify_properties.h
+++ b/shared/source/helpers/kmd_notify_properties.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/command_stream/wait_status.h"
 
 #include <atomic>
@@ -42,8 +43,8 @@ class KmdNotifyHelper {
     MOCKABLE_VIRTUAL ~KmdNotifyHelper() = default;
 
     WaitParams obtainTimeoutParams(bool quickKmdSleepRequest,
-                                   uint32_t currentHwTag,
-                                   uint32_t taskCountToWait,
+                                   TagAddressType currentHwTag,
+                                   TaskCountType taskCountToWait,
                                    FlushStamp flushStampToWait,
                                    QueueThrottle throttle,
                                    bool kmdWaitModeActive,
diff --git a/shared/source/helpers/pause_on_gpu_properties.h b/shared/source/helpers/pause_on_gpu_properties.h
index e1f1c5cefe..77a0c14eaf 100644
--- a/shared/source/helpers/pause_on_gpu_properties.h
+++ b/shared/source/helpers/pause_on_gpu_properties.h
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/debug_settings/debug_settings_manager.h"
 
 #include <cstdint>
@@ -28,7 +29,7 @@ inline bool featureEnabled(int32_t debugFlagValue) {
     return (debugFlagValue != DebugFlagValues::Disabled);
 }
 
-inline bool pauseModeAllowed(int32_t debugFlagValue, uint32_t taskCount, PauseMode pauseMode) {
+inline bool pauseModeAllowed(int32_t debugFlagValue, TaskCountType taskCount, PauseMode pauseMode) {
     if (!featureEnabled(debugFlagValue)) {
         // feature disabled
         return false;
@@ -44,16 +45,16 @@ inline bool pauseModeAllowed(int32_t debugFlagValue, uint32_t taskCount, PauseMo
         return true;
     }
 
-    return (debugFlagValue == static_cast<int32_t>(taskCount));
+    return (debugFlagValue == static_cast<int64_t>(taskCount));
 }
 
-inline bool gpuScratchRegWriteAllowed(int32_t debugFlagValue, uint32_t taskCount) {
+inline bool gpuScratchRegWriteAllowed(int32_t debugFlagValue, TaskCountType taskCount) {
     if (!featureEnabled(debugFlagValue)) {
         // feature disabled
         return false;
     }
 
-    return (debugFlagValue == static_cast<int32_t>(taskCount));
+    return (debugFlagValue == static_cast<int64_t>(taskCount));
 }
 } // namespace PauseOnGpuProperties
 } // namespace NEO
diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h
index 60d1082647..c42aa3d5c3 100644
--- a/shared/source/helpers/timestamp_packet.h
+++ b/shared/source/helpers/timestamp_packet.h
@@ -154,7 +154,7 @@ struct TimestampPacketHelper {
 
             EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream,
                                                                   static_cast<uint64_t>(tagAddressPreviousRootDevice),
-                                                                  taskCountPreviousRootDevice,
+                                                                  static_cast<uint32_t>(taskCountPreviousRootDevice),
                                                                   COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
         }
     }
diff --git a/shared/source/memory_manager/allocations_list.cpp b/shared/source/memory_manager/allocations_list.cpp
index 8de163eb49..3a01c61021 100644
--- a/shared/source/memory_manager/allocations_list.cpp
+++ b/shared/source/memory_manager/allocations_list.cpp
@@ -8,12 +8,13 @@
 #include "shared/source/memory_manager/allocations_list.h"
 
 #include "shared/source/command_stream/command_stream_receiver.h"
+#include "shared/source/command_stream/task_count_helper.h"
 
 namespace {
 struct ReusableAllocationRequirements {
     const void *requiredPtr;
     size_t requiredMinimalSize;
-    volatile uint32_t *csrTagAddress;
+    volatile TagAddressType *csrTagAddress;
     NEO::AllocationType allocationType;
     uint32_t contextId;
     uint32_t activeTileCount;
diff --git a/shared/source/memory_manager/graphics_allocation.cpp b/shared/source/memory_manager/graphics_allocation.cpp
index f261994b1b..f9c443e8ba 100644
--- a/shared/source/memory_manager/graphics_allocation.cpp
+++ b/shared/source/memory_manager/graphics_allocation.cpp
@@ -50,7 +50,7 @@ GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms,
 
 GraphicsAllocation::~GraphicsAllocation() = default;
 
-void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
+void GraphicsAllocation::updateTaskCount(TaskCountType newTaskCount, uint32_t contextId) {
     if (usageInfos[contextId].taskCount == objectNotUsed) {
         registeredContextsNum++;
     }
@@ -119,8 +119,7 @@ void GraphicsAllocation::prepareHostPtrForResidency(CommandStreamReceiver *csr)
 uint32_t GraphicsAllocation::getNumHandlesForKmdSharedAllocation(uint32_t numBanks) {
     return (numBanks > 1) && (DebugManager.flags.CreateKmdMigratedSharedAllocationWithMultipleBOs.get() != 0) ? numBanks : 1u;
 }
-
-constexpr uint32_t GraphicsAllocation::objectNotUsed;
-constexpr uint32_t GraphicsAllocation::objectNotResident;
-constexpr uint32_t GraphicsAllocation::objectAlwaysResident;
+constexpr TaskCountType GraphicsAllocation::objectNotUsed;
+constexpr TaskCountType GraphicsAllocation::objectNotResident;
+constexpr TaskCountType GraphicsAllocation::objectAlwaysResident;
 } // namespace NEO
diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h
index 0eb242f989..874e62ad64 100644
--- a/shared/source/memory_manager/graphics_allocation.h
+++ b/shared/source/memory_manager/graphics_allocation.h
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/debug_helpers.h"
 #include "shared/source/helpers/ptr_math.h"
 #include "shared/source/memory_manager/allocation_type.h"
@@ -151,22 +152,22 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
     bool isUsed() const { return registeredContextsNum > 0; }
     bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
     bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
-    MOCKABLE_VIRTUAL void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
-    MOCKABLE_VIRTUAL uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
+    MOCKABLE_VIRTUAL void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId);
+    MOCKABLE_VIRTUAL TaskCountType getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
     void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
     uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
     void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
 
     MOCKABLE_VIRTUAL bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); }
     bool isAlwaysResident(uint32_t contextId) const { return GraphicsAllocation::objectAlwaysResident == getResidencyTaskCount(contextId); }
-    void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) {
+    void updateResidencyTaskCount(TaskCountType newTaskCount, uint32_t contextId) {
         if (usageInfos[contextId].residencyTaskCount != GraphicsAllocation::objectAlwaysResident || newTaskCount == GraphicsAllocation::objectNotResident) {
             usageInfos[contextId].residencyTaskCount = newTaskCount;
         }
     }
-    uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
+    TaskCountType getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; }
     void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); }
-    bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
+    bool isResidencyTaskCountBelow(TaskCountType taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; }
 
     virtual std::string getAllocationInfoString() const;
     virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; }
@@ -269,16 +270,16 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
 
     static constexpr uint32_t defaultBank = 0b1u;
     static constexpr uint32_t allBanks = 0xffffffff;
-    constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
-    constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
-    constexpr static uint32_t objectAlwaysResident = std::numeric_limits<uint32_t>::max() - 1;
+    constexpr static TaskCountType objectNotResident = std::numeric_limits<TaskCountType>::max();
+    constexpr static TaskCountType objectNotUsed = std::numeric_limits<TaskCountType>::max();
+    constexpr static TaskCountType objectAlwaysResident = std::numeric_limits<TaskCountType>::max() - 1;
     std::atomic<uint32_t> hostPtrTaskCountAssignment{0};
     bool isShareableHostMemory = false;
 
   protected:
     struct UsageInfo {
-        uint32_t taskCount = objectNotUsed;
-        uint32_t residencyTaskCount = objectNotResident;
+        TaskCountType taskCount = objectNotUsed;
+        TaskCountType residencyTaskCount = objectNotResident;
         uint32_t inspectionId = 0u;
     };
 
diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp
index 9ea10ae9c7..af811ecd0d 100644
--- a/shared/source/memory_manager/internal_allocation_storage.cpp
+++ b/shared/source/memory_manager/internal_allocation_storage.cpp
@@ -17,7 +17,7 @@ InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &comm
     : commandStreamReceiver(commandStreamReceiver){};
 
 void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage) {
-    uint32_t taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId());
+    TaskCountType taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId());
 
     if (allocationUsage == REUSABLE_ALLOCATION) {
         taskCount = commandStreamReceiver.peekTaskCount();
@@ -25,7 +25,7 @@ void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocati
 
     storeAllocationWithTaskCount(std::move(gfxAllocation), allocationUsage, taskCount);
 }
-void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, uint32_t taskCount) {
+void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount) {
     if (allocationUsage == REUSABLE_ALLOCATION) {
         if (DebugManager.flags.DisableResourceRecycling.get()) {
             commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release());
@@ -37,7 +37,7 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<Gra
     allocationsList.pushTailOne(*gfxAllocation.release());
 }
 
-void InternalAllocationStorage::cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) {
+void InternalAllocationStorage::cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage) {
     freeAllocationsList(waitTaskCount, allocationLists[allocationUsage]);
 
     if (allocationUsage == TEMPORARY_ALLOCATION) {
@@ -45,7 +45,7 @@ void InternalAllocationStorage::cleanAllocationList(uint32_t waitTaskCount, uint
     }
 }
 
-void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList) {
+void InternalAllocationStorage::freeAllocationsList(TaskCountType waitTaskCount, AllocationsList &allocationsList) {
     auto memoryManager = commandStreamReceiver.getMemoryManager();
     auto lock = memoryManager->getHostPtrManager()->obtainOwnership();
 
diff --git a/shared/source/memory_manager/internal_allocation_storage.h b/shared/source/memory_manager/internal_allocation_storage.h
index cad4f5d8f9..f2464ea697 100644
--- a/shared/source/memory_manager/internal_allocation_storage.h
+++ b/shared/source/memory_manager/internal_allocation_storage.h
@@ -17,9 +17,9 @@ class InternalAllocationStorage {
   public:
     MOCKABLE_VIRTUAL ~InternalAllocationStorage() = default;
     InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver);
-    MOCKABLE_VIRTUAL void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage);
+    MOCKABLE_VIRTUAL void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage);
     void storeAllocation(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage);
-    void storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, uint32_t taskCount);
+    void storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount);
     std::unique_ptr<GraphicsAllocation> obtainReusableAllocation(size_t requiredSize, AllocationType allocationType);
     std::unique_ptr<GraphicsAllocation> obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType);
     AllocationsList &getTemporaryAllocations() { return allocationLists[TEMPORARY_ALLOCATION]; }
@@ -28,7 +28,7 @@ class InternalAllocationStorage {
     DeviceBitfield getDeviceBitfield() const;
 
   protected:
-    void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
+    void freeAllocationsList(TaskCountType waitTaskCount, AllocationsList &allocationsList);
     CommandStreamReceiver &commandStreamReceiver;
 
     std::array<AllocationsList, 3> allocationLists = {AllocationsList(TEMPORARY_ALLOCATION), AllocationsList(REUSABLE_ALLOCATION), AllocationsList(DEFERRED_DEALLOCATION)};
diff --git a/shared/source/memory_manager/migration_sync_data.cpp b/shared/source/memory_manager/migration_sync_data.cpp
index 5c33559dd6..3432431286 100644
--- a/shared/source/memory_manager/migration_sync_data.cpp
+++ b/shared/source/memory_manager/migration_sync_data.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -7,6 +7,7 @@
 
 #include "shared/source/memory_manager/migration_sync_data.h"
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/constants.h"
 
@@ -20,14 +21,14 @@ MigrationSyncData::~MigrationSyncData() {
 }
 
 uint32_t MigrationSyncData::getCurrentLocation() const { return currentLocation; }
-bool MigrationSyncData::isUsedByTheSameContext(volatile uint32_t *tagAddress) const { return this->tagAddress == tagAddress; }
+bool MigrationSyncData::isUsedByTheSameContext(volatile TagAddressType *tagAddress) const { return this->tagAddress == tagAddress; }
 
 void MigrationSyncData::setCurrentLocation(uint32_t rootDeviceIndex) {
     currentLocation = rootDeviceIndex;
     migrationInProgress = false;
 }
 
-void MigrationSyncData::signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount) {
+void MigrationSyncData::signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount) {
     this->tagAddress = tagAddress;
     latestTaskCountUsed = taskCount;
 }
diff --git a/shared/source/memory_manager/migration_sync_data.h b/shared/source/memory_manager/migration_sync_data.h
index 34a72d93c8..a0c10edb24 100644
--- a/shared/source/memory_manager/migration_sync_data.h
+++ b/shared/source/memory_manager/migration_sync_data.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/utilities/reference_tracked_object.h"
 
 #include <cstdint>
@@ -21,17 +22,17 @@ class MigrationSyncData : public ReferenceTrackedObject<MigrationSyncData> {
     uint32_t getCurrentLocation() const;
     void startMigration();
     void setCurrentLocation(uint32_t rootDeviceIndex);
-    MOCKABLE_VIRTUAL void signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount);
-    bool isUsedByTheSameContext(volatile uint32_t *tagAddress) const;
+    MOCKABLE_VIRTUAL void signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount);
+    bool isUsedByTheSameContext(volatile TagAddressType *tagAddress) const;
     MOCKABLE_VIRTUAL void waitOnCpu();
     bool isMigrationInProgress() const { return migrationInProgress; }
     void *getHostPtr() const { return hostPtr; }
 
   protected:
     MOCKABLE_VIRTUAL void yield() const;
-    volatile uint32_t *tagAddress = nullptr;
+    volatile TagAddressType *tagAddress = nullptr;
     void *hostPtr = nullptr;
-    uint32_t latestTaskCountUsed = 0u;
+    TaskCountType latestTaskCountUsed = 0u;
     uint32_t currentLocation = locationUndefined;
     bool migrationInProgress = false;
 };
diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp
index 57a476cbc5..2b550777fd 100644
--- a/shared/source/memory_manager/unified_memory_manager.cpp
+++ b/shared/source/memory_manager/unified_memory_manager.cpp
@@ -578,7 +578,7 @@ bool SVMAllocsManager::hasHostAllocations() {
     return false;
 }
 
-void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) {
+void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount) {
     std::unique_lock<std::shared_mutex> lock(mtx);
     bool parseAllAllocations = false;
     auto entry = indirectAllocationsResidency.find(&commandStreamReceiver);
diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h
index 7d27778eb3..867f14173d 100644
--- a/shared/source/memory_manager/unified_memory_manager.h
+++ b/shared/source/memory_manager/unified_memory_manager.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/common_types.h"
 #include "shared/source/memory_manager/multi_graphics_allocation.h"
 #include "shared/source/memory_manager/residency_container.h"
@@ -108,8 +109,8 @@ class SVMAllocsManager {
     };
 
     struct InternalAllocationsTracker {
-        uint32_t latestSentTaskCount = 0lu;
-        uint32_t latestResidentObjectId = 0lu;
+        TaskCountType latestSentTaskCount = 0lu;
+        TaskCountType latestResidentObjectId = 0lu;
     };
 
     struct UnifiedMemoryProperties {
@@ -183,7 +184,7 @@ class SVMAllocsManager {
     void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
     bool hasHostAllocations();
     std::atomic<uint32_t> allocationsCounter = 0;
-    MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
+    MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount);
     void prepareIndirectAllocationForDestruction(SvmAllocationData *);
     void prefetchMemory(Device &device, SvmAllocationData &svmData);
     std::unique_lock<std::mutex> obtainOwnership();
diff --git a/shared/source/os_interface/linux/drm_buffer_object.cpp b/shared/source/os_interface/linux/drm_buffer_object.cpp
index e6328bffca..043dc5cfe8 100644
--- a/shared/source/os_interface/linux/drm_buffer_object.cpp
+++ b/shared/source/os_interface/linux/drm_buffer_object.cpp
@@ -7,6 +7,7 @@
 
 #include "shared/source/os_interface/linux/drm_buffer_object.h"
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/gmm_helper/gmm_helper.h"
 #include "shared/source/helpers/aligned_memory.h"
 #include "shared/source/helpers/debug_helpers.h"
@@ -118,7 +119,7 @@ void BufferObject::fillExecObject(ExecObject &execObject, OsContext *osContext,
 }
 
 int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
-                       BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) {
+                       BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) {
     for (size_t i = 0; i < residencyCount; i++) {
         residency[i]->fillExecObject(execObjectsStorage[i], osContext, vmHandleId, drmContextId);
     }
diff --git a/shared/source/os_interface/linux/drm_buffer_object.h b/shared/source/os_interface/linux/drm_buffer_object.h
index 1ebc0f02f0..04c8edae41 100644
--- a/shared/source/os_interface/linux/drm_buffer_object.h
+++ b/shared/source/os_interface/linux/drm_buffer_object.h
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/common_types.h"
 #include "shared/source/helpers/constants.h"
 #include "shared/source/memory_manager/definitions/engine_limits.h"
@@ -46,7 +47,7 @@ class BufferObject {
     MOCKABLE_VIRTUAL int validateHostPtr(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId);
 
     MOCKABLE_VIRTUAL int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
-                              BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue);
+                              BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue);
 
     int bind(OsContext *osContext, uint32_t vmHandleId);
     int unbind(OsContext *osContext, uint32_t vmHandleId);
diff --git a/shared/source/os_interface/linux/drm_command_stream.h b/shared/source/os_interface/linux/drm_command_stream.h
index 235f1e29f7..60701b9a03 100644
--- a/shared/source/os_interface/linux/drm_command_stream.h
+++ b/shared/source/os_interface/linux/drm_command_stream.h
@@ -68,7 +68,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
   protected:
     MOCKABLE_VIRTUAL SubmissionStatus flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency);
     MOCKABLE_VIRTUAL int exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId, uint32_t index);
-    MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue);
+    MOCKABLE_VIRTUAL int waitUserFence(TaskCountType waitValue);
     MOCKABLE_VIRTUAL void readBackAllocation(void *source);
     bool isUserFenceWaitActive();
 
diff --git a/shared/source/os_interface/linux/drm_command_stream.inl b/shared/source/os_interface/linux/drm_command_stream.inl
index ad9aea6d3c..cc8217ae84 100644
--- a/shared/source/os_interface/linux/drm_command_stream.inl
+++ b/shared/source/os_interface/linux/drm_command_stream.inl
@@ -224,7 +224,7 @@ int DrmCommandStreamReceiver<GfxFamily>::exec(const BatchBuffer &batchBuffer, ui
     }
 
     uint64_t completionGpuAddress = 0;
-    uint32_t completionValue = 0;
+    TaskCountType completionValue = 0;
     if (this->drm->isVmBindAvailable() && this->drm->completionFenceSupport()) {
         completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->postSyncWriteOffset) + Drm::completionFenceOffset;
         completionValue = this->latestSentTaskCount;
diff --git a/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl b/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl
index 8f1982fd3f..1bf5cb0887 100644
--- a/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl
+++ b/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl
@@ -23,13 +23,13 @@ SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchB
 }
 
 template <typename GfxFamily>
-int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
+int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(TaskCountType waitValue) {
     uint32_t ctxId = 0u;
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(getTagAddress()));
     if (useContextForUserFenceWait) {
         ctxId = static_cast<const OsContextLinux *>(osContext)->getDrmContextIds()[0];
     }
-    return this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
+    return this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u);
 }
 
 } // namespace NEO
diff --git a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl
index 812ecb736e..9d1441896a 100644
--- a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl
+++ b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl
@@ -54,10 +54,10 @@ SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchB
 }
 
 template <typename GfxFamily>
-int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
+int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(TaskCountType waitValue) {
     int ret = 0;
     StackVec<uint32_t, 32> ctxIds;
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(getTagAddress()));
     if (useContextForUserFenceWait) {
         for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) {
             uint32_t ctxId = 0u;
@@ -68,12 +68,12 @@ int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
         }
         UNRECOVERABLE_IF(ctxIds.size() != this->activePartitions);
         for (uint32_t i = 0; i < this->activePartitions; i++) {
-            ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
+            ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u);
             tagAddress += this->postSyncWriteOffset;
         }
     } else {
         for (uint32_t i = 0; i < this->activePartitions; i++) {
-            ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
+            ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u);
             tagAddress += this->postSyncWriteOffset;
         }
     }
diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp
index 7e481fec6a..d93e19f934 100644
--- a/shared/source/os_interface/linux/drm_neo.cpp
+++ b/shared/source/os_interface/linux/drm_neo.cpp
@@ -1613,7 +1613,7 @@ void Drm::waitOnUserFences(const OsContextLinux &osContext, uint64_t address, ui
         if (*reinterpret_cast<uint32_t *>(completionFenceCpuAddress) < value) {
             constexpr int64_t timeout = -1;
             constexpr uint16_t flags = 0;
-            int retVal = waitUserFence(drmContextIds[drmIterator], completionFenceCpuAddress, value, Drm::ValueWidth::U32, timeout, flags);
+            int retVal = waitUserFence(drmContextIds[drmIterator], completionFenceCpuAddress, value, Drm::ValueWidth::U64, timeout, flags);
 
             if (DebugManager.flags.PrintCompletionFenceUsage.get()) {
                 std::cout << "Completion fence waited."
diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h
index d557470450..892b9e66f1 100644
--- a/shared/source/os_interface/linux/ioctl_helper.h
+++ b/shared/source/os_interface/linux/ioctl_helper.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/os_interface/linux/drm_wrappers.h"
 #include "shared/source/utilities/stackvec.h"
 
@@ -87,7 +88,7 @@ class IoctlHelper {
     virtual uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) = 0;
     virtual uint32_t queryDistances(std::vector<QueryItem> &queryItems, std::vector<DistanceInfo> &distanceInfos) = 0;
     virtual uint16_t getWaitUserFenceSoftFlag() = 0;
-    virtual int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) = 0;
+    virtual int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) = 0;
     virtual bool completionFenceExtensionSupported(const bool isVmBindAvailable) = 0;
     virtual std::optional<DrmParam> getHasPageFaultParamId() = 0;
     virtual std::unique_ptr<uint8_t[]> createVmControlExtRegion(const std::optional<MemoryClassInstance> &regionInstanceClass) = 0;
@@ -165,7 +166,7 @@ class IoctlHelperUpstream : public IoctlHelper {
     uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) override;
     uint32_t queryDistances(std::vector<QueryItem> &queryItems, std::vector<DistanceInfo> &distanceInfos) override;
     uint16_t getWaitUserFenceSoftFlag() override;
-    int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) override;
+    int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) override;
     bool completionFenceExtensionSupported(const bool isVmBindAvailable) override;
     std::optional<DrmParam> getHasPageFaultParamId() override;
     std::unique_ptr<uint8_t[]> createVmControlExtRegion(const std::optional<MemoryClassInstance> &regionInstanceClass) override;
@@ -231,7 +232,7 @@ class IoctlHelperPrelim20 : public IoctlHelper {
     uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) override;
     uint32_t queryDistances(std::vector<QueryItem> &queryItems, std::vector<DistanceInfo> &distanceInfos) override;
     uint16_t getWaitUserFenceSoftFlag() override;
-    int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) override;
+    int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) override;
     bool completionFenceExtensionSupported(const bool isVmBindAvailable) override;
     std::optional<DrmParam> getHasPageFaultParamId() override;
     std::unique_ptr<uint8_t[]> createVmControlExtRegion(const std::optional<MemoryClassInstance> &regionInstanceClass) override;
diff --git a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp
index 63022677ed..259aa501ae 100644
--- a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp
+++ b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp
@@ -256,7 +256,7 @@ uint16_t IoctlHelperPrelim20::getWaitUserFenceSoftFlag() {
     return PRELIM_I915_UFENCE_WAIT_SOFT;
 };
 
-int IoctlHelperPrelim20::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) {
+int IoctlHelperPrelim20::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) {
     prelim_drm_i915_gem_execbuffer_ext_user_fence fenceObject = {};
     if (completionGpuAddress != 0) {
         fenceObject.base.name = PRELIM_DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE;
diff --git a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp
index 9efcf54629..a0b107e90c 100644
--- a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp
+++ b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp
@@ -117,7 +117,7 @@ uint16_t IoctlHelperUpstream::getWaitUserFenceSoftFlag() {
     return 0;
 }
 
-int IoctlHelperUpstream::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) {
+int IoctlHelperUpstream::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) {
     return ioctl(DrmIoctl::GemExecbuffer2, execBuffer);
 }
 
diff --git a/shared/source/utilities/wait_util.h b/shared/source/utilities/wait_util.h
index b3f5903518..6d7491885f 100644
--- a/shared/source/utilities/wait_util.h
+++ b/shared/source/utilities/wait_util.h
@@ -1,11 +1,12 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/utilities/cpuintrinsics.h"
 
 #include <cstdint>
@@ -33,8 +34,8 @@ inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedV
     return false;
 }
 
-inline bool waitFunction(volatile uint32_t *pollAddress, uint32_t expectedValue) {
-    return waitFunctionWithPredicate<uint32_t>(pollAddress, expectedValue, std::greater_equal<uint32_t>());
+inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue) {
+    return waitFunctionWithPredicate<TaskCountType>(pollAddress, expectedValue, std::greater_equal<TaskCountType>());
 }
 
 void init();
diff --git a/shared/test/common/fixtures/device_fixture.cpp b/shared/test/common/fixtures/device_fixture.cpp
index bbe22f05cb..aebaa90c2f 100644
--- a/shared/test/common/fixtures/device_fixture.cpp
+++ b/shared/test/common/fixtures/device_fixture.cpp
@@ -24,7 +24,7 @@ void DeviceFixture::setUpImpl(const NEO::HardwareInfo *hardwareInfo) {
 
     auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
     pTagMemory = commandStreamReceiver.getTagAddress();
-    ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
+    ASSERT_NE(nullptr, const_cast<TagAddressType *>(pTagMemory));
 }
 
 void DeviceFixture::tearDown() {
diff --git a/shared/test/common/fixtures/device_fixture.h b/shared/test/common/fixtures/device_fixture.h
index 359e4dfe9e..a03238c300 100644
--- a/shared/test/common/fixtures/device_fixture.h
+++ b/shared/test/common/fixtures/device_fixture.h
@@ -19,7 +19,7 @@ struct DeviceFixture {
     MockDevice *createWithUsDeviceIdRevId(unsigned short usDeviceId, unsigned short usRevId);
 
     MockDevice *pDevice = nullptr;
-    volatile uint32_t *pTagMemory = nullptr;
+    volatile TagAddressType *pTagMemory = nullptr;
     HardwareInfo hardwareInfo = {};
     PLATFORM platformHelper = {};
     const uint32_t rootDeviceIndex = 0u;
diff --git a/shared/test/common/fixtures/memory_manager_fixture.h b/shared/test/common/fixtures/memory_manager_fixture.h
index 981cb89475..531070a63c 100644
--- a/shared/test/common/fixtures/memory_manager_fixture.h
+++ b/shared/test/common/fixtures/memory_manager_fixture.h
@@ -6,6 +6,7 @@
  */
 
 #pragma once
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/execution_environment/execution_environment.h"
 #include "shared/source/helpers/options.h"
 
@@ -21,8 +22,8 @@ class MemoryManagerWithCsrFixture {
     MockMemoryManager *memoryManager;
     ExecutionEnvironment executionEnvironment;
     std::unique_ptr<MockCommandStreamReceiver> csr;
-    uint32_t taskCount = 0;
-    uint32_t currentGpuTag = initialHardwareTag;
+    TaskCountType taskCount = 0;
+    TagAddressType currentGpuTag = initialHardwareTag;
 
     ~MemoryManagerWithCsrFixture() = default;
 
diff --git a/shared/test/common/libult/ult_aub_command_stream_receiver.h b/shared/test/common/libult/ult_aub_command_stream_receiver.h
index 9a2c6376d0..20f1193bdd 100644
--- a/shared/test/common/libult/ult_aub_command_stream_receiver.h
+++ b/shared/test/common/libult/ult_aub_command_stream_receiver.h
@@ -50,7 +50,7 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
         return csr;
     }
 
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
         blitBufferCalled++;
         return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
     }
diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h
index e106a4307d..66a854ccde 100644
--- a/shared/test/common/libult/ult_command_stream_receiver.h
+++ b/shared/test/common/libult/ult_command_stream_receiver.h
@@ -176,7 +176,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                              uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         recordedDispatchFlags = dispatchFlags;
         this->lastFlushedCommandStream = &commandStream;
         return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
@@ -196,7 +196,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
         downloadAllocationCalled = true;
     }
 
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait);
         waitForCompletionWithTimeoutTaskCountCalled++;
         if (callBaseWaitForCompletionWithTimeout) {
@@ -212,11 +212,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
         }
     }
 
-    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
+    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) {
         return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
         if (waitForTaskCountWithKmdNotifyFallbackReturnValue.has_value()) {
             return *waitForTaskCountWithKmdNotifyFallbackReturnValue;
         }
@@ -245,7 +245,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
         return makeResidentAllocations.find(graphicsAllocation) != makeResidentAllocations.end();
     }
 
-    bool isMadeResident(GraphicsAllocation *graphicsAllocation, uint32_t taskCount) const {
+    bool isMadeResident(GraphicsAllocation *graphicsAllocation, TaskCountType taskCount) const {
         auto it = makeResidentAllocations.find(graphicsAllocation);
         if (it == makeResidentAllocations.end()) {
             return false;
@@ -289,7 +289,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
         return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
     }
 
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
         blitBufferCalled++;
         receivedBlitProperties = blitPropertiesContainer;
 
@@ -363,13 +363,13 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
     BatchBuffer latestFlushedBatchBuffer = {};
 
     std::atomic<uint32_t> recursiveLockCounter;
-    std::atomic<uint32_t> latestWaitForCompletionWithTimeoutTaskCount{0};
+    std::atomic<TaskCountType> latestWaitForCompletionWithTimeoutTaskCount{0};
     std::atomic<uint32_t> waitForCompletionWithTimeoutTaskCountCalled{0};
 
     LinearStream *lastFlushedCommandStream = nullptr;
 
     uint32_t makeSurfacePackNonResidentCalled = false;
-    uint32_t latestSentTaskCountValueDuringFlush = 0;
+    TaskCountType latestSentTaskCountValueDuringFlush = 0;
     uint32_t blitBufferCalled = 0;
     uint32_t createPerDssBackedBufferCalled = 0;
     uint32_t initDirectSubmissionCalled = 0;
@@ -400,7 +400,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
     WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
     std::optional<WaitStatus> waitForTaskCountWithKmdNotifyFallbackReturnValue{};
     bool callBaseFlushBcsTask{true};
-    uint32_t flushBcsTaskReturnValue{};
+    TaskCountType flushBcsTaskReturnValue{};
     std::optional<SubmissionStatus> flushReturnValue{};
     CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW;
 };
diff --git a/shared/test/common/mocks/linux/mock_drm_allocation.h b/shared/test/common/mocks/linux/mock_drm_allocation.h
index 7a4c3e21fb..456a8c6f16 100644
--- a/shared/test/common/mocks/linux/mock_drm_allocation.h
+++ b/shared/test/common/mocks/linux/mock_drm_allocation.h
@@ -21,14 +21,14 @@ class MockBufferObject : public BufferObject {
 
     struct ExecParams {
         uint64_t completionGpuAddress = 0;
-        uint32_t completionValue = 0;
+        TaskCountType completionValue = 0;
     };
 
     std::vector<ExecParams> passedExecParams{};
     MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) {
     }
     int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
-             BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override {
+             BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) override {
         passedExecParams.push_back({completionGpuAddress, completionValue});
         return BufferObject::exec(used, startOffset, flags, requiresCoherency, osContext, vmHandleId, drmContextId,
                                   residency, residencyCount, execObjectsStorage, completionGpuAddress, completionValue);
diff --git a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h
index cea5f667e5..d61dada8ac 100644
--- a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h
+++ b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h
@@ -99,14 +99,14 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily
 
     struct WaitUserFenceResult {
         uint32_t called = 0u;
-        uint32_t waitValue = 0u;
+        TaskCountType waitValue = 0u;
         int returnValue = 0;
         bool callParent = true;
     };
 
     WaitUserFenceResult waitUserFenceResult;
 
-    int waitUserFence(uint32_t waitValue) override {
+    int waitUserFence(TaskCountType waitValue) override {
         waitUserFenceResult.called++;
         waitUserFenceResult.waitValue = waitValue;
 
diff --git a/shared/test/common/mocks/mock_aub_csr.h b/shared/test/common/mocks/mock_aub_csr.h
index 0aab83616c..a6bf4f47f8 100644
--- a/shared/test/common/mocks/mock_aub_csr.h
+++ b/shared/test/common/mocks/mock_aub_csr.h
@@ -64,7 +64,7 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw<GfxFamily> {
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
-                              uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         recordedDispatchFlags = dispatchFlags;
 
         return AUBCommandStreamReceiverHw<GfxFamily>::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
@@ -123,7 +123,7 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw<GfxFamily> {
         expectMemoryCompressedCalled = true;
         return AUBCommandStreamReceiverHw<GfxFamily>::expectMemoryCompressed(gfxAddress, srcAddress, length);
     }
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         return NEO::WaitStatus::Ready;
     }
     void addAubComment(const char *message) override {
diff --git a/shared/test/common/mocks/mock_command_stream_receiver.cpp b/shared/test/common/mocks/mock_command_stream_receiver.cpp
index f59dc2a1e6..836bdd30a9 100644
--- a/shared/test/common/mocks/mock_command_stream_receiver.cpp
+++ b/shared/test/common/mocks/mock_command_stream_receiver.cpp
@@ -7,7 +7,7 @@
 
 #include "shared/test/common/mocks/mock_command_stream_receiver.h"
 
-volatile uint32_t MockCommandStreamReceiver::mockTagAddress[MockCommandStreamReceiver::tagSize];
+volatile TagAddressType MockCommandStreamReceiver::mockTagAddress[MockCommandStreamReceiver::tagSize];
 
 SubmissionStatus MockCommandStreamReceiver::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
     return SubmissionStatus::SUCCESS;
@@ -19,7 +19,7 @@ CompletionStamp MockCommandStreamReceiver::flushTask(
     const IndirectHeap *dsh,
     const IndirectHeap *ioh,
     const IndirectHeap *ssh,
-    uint32_t taskLevel,
+    TaskCountType taskLevel,
     DispatchFlags &dispatchFlags,
     Device &device) {
     ++taskCount;
diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h
index 24006dfecc..e99c469e55 100644
--- a/shared/test/common/mocks/mock_command_stream_receiver.h
+++ b/shared/test/common/mocks/mock_command_stream_receiver.h
@@ -50,10 +50,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
     MockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
         : CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {
         CommandStreamReceiver::tagAddress = &mockTagAddress[0];
-        memset(const_cast<uint32_t *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t));
+        memset(const_cast<TagAddressType *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(TagAddressType));
     }
 
-    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
         waitForCompletionWithTimeoutCalled++;
         return waitForCompletionWithTimeoutReturnValue;
     }
@@ -80,7 +80,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
         }
     }
 
-    bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) override {
+    bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) override {
         if (testTaskCountReadyReturnValue.has_value()) {
             return *testTaskCountReadyReturnValue;
         } else {
@@ -100,7 +100,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
         const IndirectHeap *dsh,
         const IndirectHeap *ioh,
         const IndirectHeap *ssh,
-        uint32_t taskLevel,
+        TaskCountType taskLevel,
         DispatchFlags &dispatchFlags,
         Device &device) override;
 
@@ -111,15 +111,15 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
         return true;
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
         return WaitStatus::Ready;
     }
 
-    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) {
         return WaitStatus::Ready;
     }
 
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
 
     CommandStreamReceiverType getType() override {
         return commandStreamReceiverType;
@@ -176,7 +176,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
     SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::SUCCESS; }
 
     static constexpr size_t tagSize = 256;
-    static volatile uint32_t mockTagAddress[tagSize];
+    static volatile TagAddressType mockTagAddress[tagSize];
     std::vector<char> instructionHeapReserveredData;
     int *flushBatchedSubmissionsCallCounter = nullptr;
     uint32_t waitForCompletionWithTimeoutCalled = 0;
@@ -286,7 +286,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
 
     CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
                               const IndirectHeap *dsh, const IndirectHeap *ioh,
-                              const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
+                              const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
         passedDispatchFlags = dispatchFlags;
 
         recordedCommandBuffer = std::unique_ptr<CommandBuffer>(new CommandBuffer(device));
@@ -306,7 +306,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
         return completionStamp;
     }
 
-    uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
+    TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
         if (!skipBlitCalls) {
             return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
         }
diff --git a/shared/test/common/mocks/mock_csr.h b/shared/test/common/mocks/mock_csr.h
index b6e7305023..1c17805c55 100644
--- a/shared/test/common/mocks/mock_csr.h
+++ b/shared/test/common/mocks/mock_csr.h
@@ -117,7 +117,7 @@ class MockCsr : public MockCsrBase<GfxFamily> {
         const IndirectHeap *dsh,
         const IndirectHeap *ioh,
         const IndirectHeap *ssh,
-        uint32_t taskLevel,
+        TaskCountType taskLevel,
         DispatchFlags &dispatchFlags,
         Device &device) override {
         this->flushTaskStamp = *this->executionStamp;
@@ -140,7 +140,7 @@ class MockCsr : public MockCsrBase<GfxFamily> {
     bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; }
 
     bool slmUsedInLastFlushTask = false;
-    uint32_t lastTaskLevelToFlushTask = 0;
+    TaskCountType lastTaskLevelToFlushTask = 0;
 };
 
 template <typename GfxFamily>
diff --git a/shared/test/common/mocks/mock_graphics_allocation.h b/shared/test/common/mocks/mock_graphics_allocation.h
index fad91e2d31..498f9c7c9b 100644
--- a/shared/test/common/mocks/mock_graphics_allocation.h
+++ b/shared/test/common/mocks/mock_graphics_allocation.h
@@ -54,11 +54,11 @@ class MockGraphicsAllocation : public MemoryAllocation {
 
 class MockGraphicsAllocationTaskCount : public MockGraphicsAllocation {
   public:
-    uint32_t getTaskCount(uint32_t contextId) const override {
+    TaskCountType getTaskCount(uint32_t contextId) const override {
         getTaskCountCalleedTimes++;
         return MockGraphicsAllocation::getTaskCount(contextId);
     }
-    void updateTaskCount(uint32_t newTaskCount, uint32_t contextId) override {
+    void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId) override {
         updateTaskCountCalleedTimes++;
         MockGraphicsAllocation::updateTaskCount(newTaskCount, contextId);
     }
diff --git a/shared/test/common/mocks/mock_internal_allocation_storage.h b/shared/test/common/mocks/mock_internal_allocation_storage.h
index 0335ddf234..d20619bfe7 100644
--- a/shared/test/common/mocks/mock_internal_allocation_storage.h
+++ b/shared/test/common/mocks/mock_internal_allocation_storage.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -13,7 +13,7 @@ namespace NEO {
 class MockInternalAllocationStorage : public InternalAllocationStorage {
   public:
     using InternalAllocationStorage::InternalAllocationStorage;
-    void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) override {
+    void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage) override {
         cleanAllocationsCalled++;
         lastCleanAllocationsTaskCount = waitTaskCount;
         lastCleanAllocationUsage = allocationUsage;
@@ -23,14 +23,14 @@ class MockInternalAllocationStorage : public InternalAllocationStorage {
             doUpdateCompletion = false;
         }
     }
-    void updateCompletionAfterCleaningList(uint32_t newValue) {
+    void updateCompletionAfterCleaningList(TaskCountType newValue) {
         doUpdateCompletion = true;
         valueToUpdateCompletion = newValue;
     }
     bool doUpdateCompletion = false;
-    uint32_t valueToUpdateCompletion;
+    TaskCountType valueToUpdateCompletion;
     uint32_t lastCleanAllocationUsage = 0;
-    uint32_t lastCleanAllocationsTaskCount = 0;
+    TaskCountType lastCleanAllocationsTaskCount = 0;
     uint32_t cleanAllocationsCalled = 0;
 };
 } // namespace NEO
diff --git a/shared/test/common/mocks/mock_migration_sync_data.h b/shared/test/common/mocks/mock_migration_sync_data.h
index 4d4a425c93..6dc2a3e1f3 100644
--- a/shared/test/common/mocks/mock_migration_sync_data.h
+++ b/shared/test/common/mocks/mock_migration_sync_data.h
@@ -21,7 +21,7 @@ struct MockMigrationSyncData : public MigrationSyncData {
     using MigrationSyncData::latestTaskCountUsed;
     using MigrationSyncData::MigrationSyncData;
     using MigrationSyncData::tagAddress;
-    void signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount) override {
+    void signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount) override {
         signalUsageCalled++;
         MigrationSyncData::signalUsage(tagAddress, taskCount);
     }
diff --git a/shared/test/common/mocks/mock_tbx_csr.h b/shared/test/common/mocks/mock_tbx_csr.h
index c0c2d215e4..f3f8bc5e3f 100644
--- a/shared/test/common/mocks/mock_tbx_csr.h
+++ b/shared/test/common/mocks/mock_tbx_csr.h
@@ -91,7 +91,7 @@ struct MockTbxCsrRegisterDownloadedAllocations : TbxCommandStreamReceiverHw<GfxF
         this->downloadAllocationImpl = nullptr;
     }
     void downloadAllocationTbxMock(GraphicsAllocation &gfxAllocation) {
-        *reinterpret_cast<uint32_t *>(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount;
+        *reinterpret_cast<TaskCountType *>(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount;
         downloadedAllocations.insert(&gfxAllocation);
     }
     bool flushBatchedSubmissions() override {
diff --git a/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h b/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h
index 3e091bbc0e..bd26929427 100644
--- a/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h
+++ b/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h
@@ -40,7 +40,7 @@ class TestedBufferObject : public BufferObject {
     }
 
     int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
-             BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override {
+             BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) override {
         this->receivedCompletionGpuAddress = completionGpuAddress;
         this->receivedCompletionValue = completionValue;
         this->execCalled++;
@@ -61,7 +61,7 @@ class TestedBufferObject : public BufferObject {
 
     uint64_t receivedCompletionGpuAddress = 0;
     ExecObject *execObjectPointerFilled = nullptr;
-    uint32_t receivedCompletionValue = 0;
+    TaskCountType receivedCompletionValue = 0;
     uint32_t execCalled = 0;
     bool callBaseEvictUnusedAllocations{true};
 };
diff --git a/shared/test/common/utilities/cpuintrinsics.cpp b/shared/test/common/utilities/cpuintrinsics.cpp
index 1b34ea7965..7dc9348aa2 100644
--- a/shared/test/common/utilities/cpuintrinsics.cpp
+++ b/shared/test/common/utilities/cpuintrinsics.cpp
@@ -7,6 +7,7 @@
 
 #include "shared/source/utilities/cpuintrinsics.h"
 
+#include "shared/source/command_stream/task_count_helper.h"
 #include "shared/source/helpers/ptr_math.h"
 
 #include <atomic>
@@ -14,14 +15,14 @@
 #include <functional>
 
 namespace CpuIntrinsicsTests {
-//std::atomic is used for sake of sanitation in MT tests
+// std::atomic is used for sake of sanitation in MT tests
 std::atomic<uintptr_t> lastClFlushedPtr(0u);
 std::atomic<uint32_t> clFlushCounter(0u);
 std::atomic<uint32_t> pauseCounter(0u);
 std::atomic<uint32_t> sfenceCounter(0u);
 
-volatile uint32_t *pauseAddress = nullptr;
-uint32_t pauseValue = 0u;
+volatile TagAddressType *pauseAddress = nullptr;
+TaskCountType pauseValue = 0u;
 uint32_t pauseOffset = 0u;
 
 std::function<void()> setupPauseAddress;
diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp
index 3a5f9bea3e..81aeb77359 100644
--- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp
+++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp
@@ -78,7 +78,7 @@ TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionValueTh
 }
 
 TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionAddressThenProperAddressIsReturned) {
-    auto expectedAddress = castToUint64(const_cast<uint32_t *>(commandStreamReceiver->getTagAddress()));
+    auto expectedAddress = castToUint64(const_cast<TagAddressType *>(commandStreamReceiver->getTagAddress()));
     EXPECT_EQ(expectedAddress, commandStreamReceiver->getCompletionAddress());
 }
 
@@ -186,7 +186,7 @@ TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotCh
 }
 
 TEST_F(CommandStreamReceiverTest, WhenCommandStreamReceiverIsCreatedThenItHasATagValue) {
-    EXPECT_NE(nullptr, const_cast<uint32_t *>(commandStreamReceiver->getTagAddress()));
+    EXPECT_NE(nullptr, const_cast<TagAddressType *>(commandStreamReceiver->getTagAddress()));
 }
 
 TEST_F(CommandStreamReceiverTest, WhenGettingCommandStreamerThenValidPointerIsReturned) {
@@ -275,7 +275,7 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTi
     csr.activePartitions = 1;
     csr.gpuHangCheckPeriod = 0us;
 
-    volatile std::uint32_t tasksCount[16] = {};
+    volatile TagAddressType tasksCount[16] = {};
     csr.tagAddress = tasksCount;
 
     constexpr auto enableTimeout = false;
@@ -290,7 +290,7 @@ HWTEST_F(CommandStreamReceiverTest, givenNoGpuHangWhenWaititingForCompletionWith
     auto driverModelMock = std::make_unique<MockDriverModel>();
     driverModelMock->isGpuHangDetectedToReturn = false;
 
-    volatile std::uint32_t tasksCount[16] = {};
+    volatile TagAddressType tasksCount[16] = {};
     driverModelMock->isGpuHangDetectedSideEffect = [&tasksCount] {
         tasksCount[0]++;
     };
@@ -367,7 +367,7 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpu
     csr.activePartitions = 1;
     csr.gpuHangCheckPeriod = 0us;
 
-    volatile std::uint32_t tasksCount[16] = {};
+    volatile TagAddressType tasksCount[16] = {};
     csr.tagAddress = tasksCount;
 
     constexpr auto taskCountToWait = 1;
@@ -413,8 +413,8 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangAndNonEmptyAllocationsListWhenCa
     csr.activePartitions = 1;
     csr.gpuHangCheckPeriod = 0us;
 
-    volatile std::uint32_t tasksCount[16] = {};
-    VariableBackup<volatile std::uint32_t *> csrTagAddressBackup(&csr.tagAddress);
+    volatile TagAddressType tasksCount[16] = {};
+    VariableBackup<volatile TagAddressType *> csrTagAddressBackup(&csr.tagAddress);
     csr.tagAddress = tasksCount;
 
     auto hostPtr = reinterpret_cast<void *>(0x1234);
@@ -1473,8 +1473,8 @@ TEST(CommandStreamReceiverSimpleTest, givenGpuNotIdleImplicitFlushCheckEnabledWh
 
 namespace CpuIntrinsicsTests {
 extern std::atomic<uint32_t> pauseCounter;
-extern volatile uint32_t *pauseAddress;
-extern uint32_t pauseValue;
+extern volatile TagAddressType *pauseAddress;
+extern TaskCountType pauseValue;
 extern uint32_t pauseOffset;
 } // namespace CpuIntrinsicsTests
 
@@ -1505,8 +1505,8 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo
     csr.taskCount = 3u;
     csr.activePartitions = 2;
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
 
     CpuIntrinsicsTests::pauseAddress = &csr.mockTagAddress[0];
@@ -1532,8 +1532,8 @@ TEST(CommandStreamReceiverSimpleTest, givenEmptyTemporaryAllocationListWhenWaiti
     csr.mockTagAddress[0] = 0u;
     csr.taskCount = 3u;
 
-    VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
-    VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
+    VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
+    VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
     VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
 
     CpuIntrinsicsTests::pauseAddress = &csr.mockTagAddress[0];
@@ -2145,15 +2145,15 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn
     auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
     csr.activePartitions = 2;
 
-    volatile uint32_t *tagAddress = csr.tagAddress;
-    constexpr uint32_t tagValue = 2;
+    volatile TagAddressType *tagAddress = csr.tagAddress;
+    constexpr TagAddressType tagValue = 2;
     *tagAddress = tagValue;
     tagAddress = ptrOffset(tagAddress, csr.postSyncWriteOffset);
     *tagAddress = tagValue;
 
     WaitParams waitParams;
     waitParams.waitTimeout = std::numeric_limits<int64_t>::max();
-    constexpr uint32_t taskCount = 1;
+    constexpr TaskCountType taskCount = 1;
 
     testing::internal::CaptureStdout();
 
@@ -2166,7 +2166,7 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn
 
     expectedOutput << std::endl
                    << "Waiting for task count " << taskCount
-                   << " at location " << const_cast<uint32_t *>(csr.tagAddress)
+                   << " at location " << const_cast<TagAddressType *>(csr.tagAddress)
                    << " with timeout " << std::hex << waitParams.waitTimeout
                    << ". Current value: " << std::dec << tagValue
                    << " " << tagValue
@@ -2298,7 +2298,7 @@ struct MockRequiredScratchSpaceController : public ScratchSpaceControllerBase {
                                  uint32_t scratchSlot,
                                  uint32_t requiredPerThreadScratchSize,
                                  uint32_t requiredPerThreadPrivateScratchSize,
-                                 uint32_t currentTaskCount,
+                                 TaskCountType currentTaskCount,
                                  OsContext &osContext,
                                  bool &stateBaseAddressDirty,
                                  bool &vfeStateDirty) override {
diff --git a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp
index acdb492df6..45d09e268a 100644
--- a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp
+++ b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp
@@ -56,7 +56,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test {
     MockMemoryManager *memoryManager = nullptr;
     std::unique_ptr<MockDevice> device;
     uint32_t defaultOsContextId = 0;
-    volatile uint32_t *hwTag = nullptr;
+    volatile TagAddressType *hwTag = nullptr;
 };
 
 TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenWaitForEachTaskCount) {
diff --git a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp
index a35044f201..179124512b 100644
--- a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp
+++ b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp
@@ -876,7 +876,7 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF
     auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()});
     EXPECT_NE(nullptr, fragment2);
 
-    uint32_t taskCountReady = 2;
+    TaskCountType taskCountReady = 2;
     auto storage = new MockInternalAllocationStorage(*csr);
     csr->internalAllocationStorage.reset(storage);
     storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
@@ -901,16 +901,16 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF
 }
 
 HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWaitAndCleanOnAllEngines) {
-    uint32_t taskCountReady = 2;
-    uint32_t taskCountNotReady = 1;
+    TaskCountType taskCountReady = 2;
+    TaskCountType taskCountNotReady = 1;
 
     auto &engines = memoryManager->getRegisteredEngines();
     EXPECT_EQ(1u, engines.size());
 
     auto csr0 = static_cast<MockCommandStreamReceiver *>(engines[0].commandStreamReceiver);
     auto csr1 = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, 1);
-    uint32_t csr0GpuTag = taskCountNotReady;
-    uint32_t csr1GpuTag = taskCountNotReady;
+    TaskCountType csr0GpuTag = taskCountNotReady;
+    TaskCountType csr1GpuTag = taskCountNotReady;
     csr0->tagAddress = &csr0GpuTag;
     csr1->tagAddress = &csr1GpuTag;
     auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::LowPriority}));
@@ -969,7 +969,7 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF
     auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()});
     EXPECT_NE(nullptr, fragment2);
 
-    uint32_t taskCountReady = 2;
+    TaskCountType taskCountReady = 2;
     auto storage = csr->getInternalAllocationStorage();
     storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
 
@@ -1052,7 +1052,7 @@ TEST_F(HostPtrAllocationTest, GivenAllocationsWithBiggerOverlapWhenChckingForOve
     auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()});
     EXPECT_NE(nullptr, fragment2);
 
-    uint32_t taskCountReady = 1;
+    TaskCountType taskCountReady = 1;
     auto storage = csr->getInternalAllocationStorage();
     storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
 
diff --git a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp
index 66f06455bf..7e423683be 100644
--- a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp
+++ b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp
@@ -288,7 +288,7 @@ TEST_F(MigrationSyncDataTests, whenSetTargetLocationIsCalledThenProperLocationIs
 
 TEST(MigrationSyncDataTest, whenWaitOnCpuIsCalledThenWaitForValueSpecifiedInSignalUsageMethod) {
     auto migrationSyncData = std::make_unique<MockMigrationSyncDataWithYield>(MemoryConstants::pageSize);
-    uint32_t tagAddress = 0;
+    TagAddressType tagAddress = 0;
 
     migrationSyncData->signalUsage(&tagAddress, 2u);
     migrationSyncData->waitOnCpu();
@@ -297,7 +297,7 @@ TEST(MigrationSyncDataTest, whenWaitOnCpuIsCalledThenWaitForValueSpecifiedInSign
 
 TEST(MigrationSyncDataTest, whenTaskCountIsHigherThanExpectedThenWaitOnCpuDoesntHang) {
     auto migrationSyncData = std::make_unique<MockMigrationSyncData>(MemoryConstants::pageSize);
-    uint32_t tagAddress = 5u;
+    TagAddressType tagAddress = 5u;
 
     migrationSyncData->signalUsage(&tagAddress, 2u);
     EXPECT_EQ(&tagAddress, migrationSyncData->tagAddress);
diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp
index 0345be7fb3..63521c91fb 100644
--- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp
+++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp
@@ -62,7 +62,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGetting
 HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) {
     csr->initializeTagAllocation();
     EXPECT_NE(nullptr, csr->getTagAddress());
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(csr->getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(csr->getTagAddress()));
     auto expectedAddress = tagAddress + Drm::completionFenceOffset;
     EXPECT_EQ(expectedAddress, csr->getCompletionAddress());
 }
diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp
index bac34dde9c..15fb9c2926 100644
--- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp
+++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp
@@ -746,7 +746,7 @@ struct MockDrmDirectSubmissionToTestDtor : public DrmDirectSubmission<GfxFamily,
         functionsCalled.stopRingBuffer = true;
         return true;
     }
-    void wait(uint32_t taskCountToWait) override {
+    void wait(TaskCountType taskCountToWait) override {
         functionsCalled.wait = true;
     }
     void deallocateResources() override {
diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp
index 52eaebc000..71bdc0a86d 100644
--- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp
+++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp
@@ -142,7 +142,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     testDrmCsr->useContextForUserFenceWait = true;
     testDrmCsr->activePartitions = static_cast<uint32_t>(drmCtxSize);
 
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(testDrmCsr->getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(testDrmCsr->getTagAddress()));
     FlushStamp handleToWait = 123;
     testDrmCsr->waitForFlushStamp(handleToWait);
 
@@ -155,7 +155,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     EXPECT_NE(0u, mock->context.receivedGemWaitUserFence.ctxId);
     EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op);
     EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags);
-    EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
+    EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
     EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout);
 }
 
@@ -175,7 +175,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     testDrmCsr->useUserFenceWait = true;
     testDrmCsr->useContextForUserFenceWait = false;
 
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(testDrmCsr->getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TaskCountType *>(testDrmCsr->getTagAddress()));
     FlushStamp handleToWait = 123;
     testDrmCsr->waitForFlushStamp(handleToWait);
 
@@ -188,7 +188,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.ctxId);
     EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op);
     EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags);
-    EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
+    EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
     EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout);
 }
 
@@ -210,7 +210,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     testDrmCsr->activePartitions = 2u;
     EXPECT_NE(0u, testDrmCsr->postSyncWriteOffset);
 
-    uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(testDrmCsr->getTagAddress()));
+    uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(testDrmCsr->getTagAddress()));
     FlushStamp handleToWait = 123;
     testDrmCsr->waitForFlushStamp(handleToWait);
 
@@ -223,7 +223,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab
     EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.ctxId);
     EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op);
     EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags);
-    EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
+    EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask);
     EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout);
 }
 
diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp
index c464046765..fc31bc37d2 100644
--- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp
+++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp
@@ -126,15 +126,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
     auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER});
     allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
 
-    volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
-    completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t));
+    volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
+    completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType));
     *completionAddress = 1;
-    completionAddress += (postSyncOffset / sizeof(uint32_t));
+    completionAddress += (postSyncOffset / sizeof(TagAddressType));
     *completionAddress = 1;
 
     memoryManager->handleFenceCompletion(allocation);
 
-    uint64_t expectedAddress = castToUint64(const_cast<uint32_t *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
+    uint64_t expectedAddress = castToUint64(const_cast<TagAddressType *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
                                Drm::completionFenceOffset +
                                postSyncOffset;
     constexpr uint64_t expectedValue = 2;
@@ -160,15 +160,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
     auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER});
     allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
 
-    volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
-    completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t));
+    volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
+    completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType));
     *completionAddress = 2; //1st context is ready
-    completionAddress += (postSyncOffset / sizeof(uint32_t));
+    completionAddress += (postSyncOffset / sizeof(TagAddressType));
     *completionAddress = 1;
 
     memoryManager->handleFenceCompletion(allocation);
 
-    uint64_t expectedAddress = castToUint64(const_cast<uint32_t *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
+    uint64_t expectedAddress = castToUint64(const_cast<TagAddressType *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
                                Drm::completionFenceOffset +
                                postSyncOffset;
     constexpr uint64_t expectedValue = 2;
diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp
index eee41dbefa..87831424df 100644
--- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp
+++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp
@@ -5367,7 +5367,7 @@ TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOf
     auto engine = memoryManager->getRegisteredEngines()[0];
     allocation->updateTaskCount(2, engine.osContext->getContextId());
 
-    uint64_t expectedFenceAddress = castToUint64(const_cast<uint32_t *>(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset;
+    uint64_t expectedFenceAddress = castToUint64(const_cast<TagAddressType *>(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset;
     constexpr uint64_t expectedValue = 2;
 
     memoryManager->handleFenceCompletion(allocation);
diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp
index 53f30bc973..46aa7a02a3 100644
--- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp
+++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp
@@ -22,7 +22,7 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
                       uint32_t offset,
                       uint32_t requiredPerThreadScratchSize,
                       uint32_t requiredPerThreadPrivateScratchSize,
-                      uint32_t currentTaskCount,
+                      TaskCountType currentTaskCount,
                       OsContext &osContext,
                       bool &stateBaseAddressDirty,
                       bool &vfeStateDirty) override {
@@ -32,7 +32,7 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
     void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
                                                uint32_t requiredPerThreadScratchSize,
                                                uint32_t requiredPerThreadPrivateScratchSize,
-                                               uint32_t currentTaskCount,
+                                               TaskCountType currentTaskCount,
                                                OsContext &osContext,
                                                bool &stateBaseAddressDirty,
                                                bool &vfeStateDirty,
diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp
index cc185da6bc..3a386d4354 100644
--- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp
+++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp
@@ -34,7 +34,7 @@ class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHP
     }
     void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
                                   uint32_t requiredPerThreadPrivateScratchSize,
-                                  uint32_t currentTaskCount,
+                                  TaskCountType currentTaskCount,
                                   OsContext &osContext,
                                   bool &stateBaseAddressDirty,
                                   bool &scratchSurfaceDirty,
diff --git a/shared/test/unit_test/utilities/wait_util_tests.cpp b/shared/test/unit_test/utilities/wait_util_tests.cpp
index 9a2828a902..6688b2edb0 100644
--- a/shared/test/unit_test/utilities/wait_util_tests.cpp
+++ b/shared/test/unit_test/utilities/wait_util_tests.cpp
@@ -50,8 +50,8 @@ TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedDoesNotMeetCriteriaThe
     WaitUtils::init();
     EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount);
 
-    volatile uint32_t pollValue = 1u;
-    uint32_t expectedValue = 3;
+    volatile TagAddressType pollValue = 1u;
+    TaskCountType expectedValue = 3;
 
     uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
     bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);
@@ -63,8 +63,8 @@ TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedMeetsCriteriaThenPause
     WaitUtils::init();
     EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount);
 
-    volatile uint32_t pollValue = 3u;
-    uint32_t expectedValue = 1;
+    volatile TagAddressType pollValue = 3u;
+    TaskCountType expectedValue = 1;
 
     uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
     bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);
@@ -82,8 +82,8 @@ TEST(WaitTest, givenDebugFlagSetZeroWhenPollAddressProvidedMeetsCriteriaThenPaus
     WaitUtils::init();
     EXPECT_EQ(count, WaitUtils::waitCount);
 
-    volatile uint32_t pollValue = 3u;
-    uint32_t expectedValue = 1;
+    volatile TagAddressType pollValue = 3u;
+    TaskCountType expectedValue = 1;
 
     uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load();
     bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);