diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 85a1e43315..292a92768f 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -983,7 +983,7 @@ void CommandQueueHw::dispatchTaskCountPostSyncByMiFlushDw( } uint64_t postSyncAddress = this->csr->getTagAllocation()->getGpuAddress(); - uint32_t postSyncData = this->csr->peekTaskCount() + 1; + TaskCountType postSyncData = this->csr->peekTaskCount() + 1; const auto &hwInfo = this->device->getHwInfo(); NEO::MiFlushArgs args; @@ -1003,7 +1003,7 @@ void CommandQueueHw::dispatchTaskCountPostSyncRegular( } uint64_t postSyncAddress = this->csr->getTagAllocation()->getGpuAddress(); - uint32_t postSyncData = this->csr->peekTaskCount() + 1; + TaskCountType postSyncData = this->csr->peekTaskCount() + 1; const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index cb20b3f1d0..484ec077a2 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -10,6 +10,7 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/submission_status.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/completion_stamp.h" @@ -46,16 +47,16 @@ struct CommandQueueImp : public CommandQueue { return buffers[bufferUse]; } - void setCurrentFlushStamp(uint32_t taskCount, NEO::FlushStamp flushStamp) { + void setCurrentFlushStamp(TaskCountType taskCount, NEO::FlushStamp flushStamp) { flushId[bufferUse] = std::make_pair(taskCount, flushStamp); } - std::pair &getCurrentFlushStamp() { + std::pair &getCurrentFlushStamp() { return flushId[bufferUse]; } private: NEO::GraphicsAllocation *buffers[BUFFER_ALLOCATION::COUNT]; - std::pair flushId[BUFFER_ALLOCATION::COUNT]; + std::pair flushId[BUFFER_ALLOCATION::COUNT]; BUFFER_ALLOCATION bufferUse = BUFFER_ALLOCATION::FIRST; }; static constexpr size_t defaultQueueCmdBufferSize = 128 * MemoryConstants::kiloByte; @@ -76,7 +77,7 @@ struct CommandQueueImp : public CommandQueue { Device *getDevice() { return device; } - uint32_t getTaskCount() { return taskCount; } + TaskCountType getTaskCount() { return taskCount; } NEO::CommandStreamReceiver *getCsr() { return csr; } @@ -105,7 +106,7 @@ struct CommandQueueImp : public CommandQueue { NEO::CommandStreamReceiver *csr = nullptr; NEO::LinearStream commandStream{}; - std::atomic taskCount{0}; + std::atomic taskCount{0}; bool useKmdWaitFunction = false; }; diff --git a/level_zero/core/source/fence/fence.h b/level_zero/core/source/fence/fence.h index 9ac34370f5..d4e6fbd155 100644 --- a/level_zero/core/source/fence/fence.h +++ b/level_zero/core/source/fence/fence.h @@ -7,6 +7,8 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" + #include #include @@ -39,7 +41,7 @@ struct Fence : _ze_fence_handle_t { std::chrono::microseconds gpuHangCheckPeriod{500'000}; CommandQueueImp *cmdQueue; - uint32_t taskCount = 0; + TaskCountType taskCount = 0; }; } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index b7dbf3a14b..fdf7989956 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -109,7 +109,7 @@ HWTEST_F(CommandQueueCreate, givenGpuHangOnSecondReserveWhenReservingLinearStrea auto firstAllocation = commandQueue->commandStream.getGraphicsAllocation(); EXPECT_EQ(firstAllocation, commandQueue->buffers.getCurrentBufferAllocation()); - uint32_t currentTaskCount = 33u; + TaskCountType currentTaskCount = 33u; auto &csr = neoDevice->getUltCommandStreamReceiver(); csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; csr.waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; @@ -150,7 +150,7 @@ HWTEST_F(CommandQueueCreate, whenReserveLinearStreamThenBufferAllocationSwitched auto firstAllocation = commandQueue->commandStream.getGraphicsAllocation(); EXPECT_EQ(firstAllocation, commandQueue->buffers.getCurrentBufferAllocation()); - uint32_t currentTaskCount = 33u; + TaskCountType currentTaskCount = 33u; auto &csr = neoDevice->getUltCommandStreamReceiver(); csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; @@ -624,8 +624,8 @@ HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainer false, returnValue)); ResidencyContainer container; - uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount(); - uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount(); + TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount(); + TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount(); NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false); EXPECT_EQ(csr->makeResidentCalledTimes, 0u); EXPECT_EQ(ret, NEO::SubmissionStatus::SUCCESS); @@ -649,8 +649,8 @@ HWTEST_F(CommandQueueCreate, givenCommandStreamReceiverFailsThenSubmitBatchBuffe false, returnValue)); ResidencyContainer container; - uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount(); - uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount(); + TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount(); + TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount(); NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false); EXPECT_EQ(ret, NEO::SubmissionStatus::FAILED); EXPECT_EQ(peekTaskCountBefore, commandQueue->csr->peekTaskCount()); @@ -1263,7 +1263,7 @@ HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenWaitForCompl auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); auto commandListHandle = commandList->toHandle(); - uint32_t flushedTaskCountPrior = csr->peekTaskCount(); + TaskCountType flushedTaskCountPrior = csr->peekTaskCount(); csr->setLatestFlushedTaskCount(flushedTaskCountPrior); auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res); @@ -1902,7 +1902,7 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD struct SVMAllocsManagerMock : public NEO::SVMAllocsManager { using SVMAllocsManager::mtxForIndirectAccess; SVMAllocsManagerMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} - void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) override { + void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount) override { makeIndirectAllocationsResidentCalledTimes++; } void addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex, diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index de55f2ba67..56f54665ec 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -143,23 +143,23 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver { SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : NEO::UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) { CommandStreamReceiver::tagAddress = &tagAddressData[0]; - memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t)); + memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t)); } - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { enableTimeoutSet = params.enableTimeout; waitForComplitionCalledTimes++; partitionCountSet = this->activePartitions; return waitForCompletionWithTimeoutResult; } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, NEO::QueueThrottle throttle) override { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, NEO::QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCalled++; return NEO::UltCommandStreamReceiver::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, throttle); } static constexpr size_t tagSize = 128; - static volatile uint32_t tagAddressData[tagSize]; + static volatile TagAddressType tagAddressData[tagSize]; uint32_t waitForComplitionCalledTimes = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; uint32_t partitionCountSet = 0; @@ -168,7 +168,7 @@ struct SynchronizeCsr : public NEO::UltCommandStreamReceiver { }; template -volatile uint32_t SynchronizeCsr::tagAddressData[SynchronizeCsr::tagSize]; +volatile TagAddressType SynchronizeCsr::tagAddressData[SynchronizeCsr::tagSize]; HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) { auto csr = std::unique_ptr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(), @@ -301,7 +301,7 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenC csr->createPreemptionAllocation(); } EXPECT_NE(0u, csr->getPostSyncWriteOffset()); - volatile uint32_t *tagAddress = csr->getTagAddress(); + volatile TagAddressType *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); @@ -341,7 +341,7 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartiti csr->createPreemptionAllocation(); } EXPECT_NE(0u, csr->getPostSyncWriteOffset()); - volatile uint32_t *tagAddress = csr->getTagAddress(); + volatile TagAddressType *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); @@ -402,7 +402,7 @@ struct TestCmdQueueCsr : public NEO::UltCommandStreamReceiver { : NEO::UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) { } - ADDMETHOD_NOBASE(waitForCompletionWithTimeout, NEO::WaitStatus, NEO::WaitStatus::NotReady, (const WaitParams ¶ms, uint32_t taskCountToWait)); + ADDMETHOD_NOBASE(waitForCompletionWithTimeout, NEO::WaitStatus, NEO::WaitStatus::NotReady, (const WaitParams ¶ms, TaskCountType taskCountToWait)); }; HWTEST_F(CommandQueueSynchronizeTest, givenSinglePartitionCountWhenWaitFunctionFailsThenReturnNotReady) { @@ -490,7 +490,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF auto pipeControls = findAll(cmdList.begin(), cmdList.end()); size_t pipeControlsPostSyncNumber = 0u; - uint32_t expectedData = commandQueue->getCsr()->peekTaskCount(); + TaskCountType expectedData = commandQueue->getCsr()->peekTaskCount(); for (size_t i = 0; i < pipeControls.size(); i++) { auto pipeControl = reinterpret_cast(*pipeControls[i]); if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { @@ -792,7 +792,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { @@ -855,7 +855,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { @@ -908,7 +908,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenBindlessEnabledThenHand void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 8641b3b789..1ca5cc7470 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -34,8 +34,8 @@ using namespace std::chrono_literals; namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; -extern volatile uint32_t *pauseAddress; -extern uint32_t pauseValue; +extern volatile TagAddressType *pauseAddress; +extern TaskCountType pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests @@ -902,12 +902,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForN const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextStartOffset(); - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; - CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { @@ -917,7 +917,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForN CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { - volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); @@ -938,12 +938,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForO const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextEndOffset(); - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; - CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { @@ -953,7 +953,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForO CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { - volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); @@ -974,12 +974,12 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForT const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextEndOffset(); - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; - CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { @@ -989,7 +989,7 @@ TEST_F(EventUsedPacketSignalSynchronizeTest, givenInfiniteTimeoutWhenWaitingForT CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { - volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); @@ -2108,8 +2108,8 @@ HWTEST_F(EventTests, constexpr uint32_t iterations = 5; - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX; @@ -2124,7 +2124,7 @@ HWTEST_F(EventTests, if (event->isUsingContextEndOffset()) { eventCompletionOffset = event->getContextEndOffset(); } - uint32_t *eventAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + TagAddressType *eventAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); *eventAddress = Event::STATE_INITIAL; CpuIntrinsicsTests::pauseCounter = 0u; @@ -2132,7 +2132,7 @@ HWTEST_F(EventTests, CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter >= iterations) { - volatile uint32_t *packet = CpuIntrinsicsTests::pauseAddress; + volatile TagAddressType *packet = CpuIntrinsicsTests::pauseAddress; *packet = Event::STATE_SIGNALED; } }; diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index 73e3fbc4f7..e68310176c 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -24,8 +24,8 @@ using namespace std::chrono_literals; namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; -extern volatile uint32_t *pauseAddress; -extern uint32_t pauseValue; +extern volatile TagAddressType *pauseAddress; +extern TaskCountType pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests @@ -247,14 +247,14 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh fence->taskCount = 1; - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseAddress = csr->getTagAddress(); - volatile uint32_t *hostAddr = csr->getTagAddress(); + volatile TagAddressType *hostAddr = csr->getTagAddress(); for (uint32_t i = 0; i < activePartitions; i++) { *hostAddr = 0; hostAddr = ptrOffset(hostAddr, postSyncOffset); @@ -262,7 +262,7 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { - volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + volatile TagAddressType *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < activePartitions; i++) { *nextPacket = 1; nextPacket = ptrOffset(nextPacket, postSyncOffset); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 675f9ba5bb..6f9245cb2a 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -2335,9 +2335,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, EXPECT_NE(nullptr, ptr1); auto allocationData1 = svmManager->getSVMAlloc(ptr1); - uint32_t prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); + TaskCountType prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); svmManager->prepareIndirectAllocationForDestruction(allocationData1); - uint32_t postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); + TaskCountType postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1); diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index e129989e30..d5e344de14 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -60,7 +60,7 @@ CommandQueue *CommandQueue::create(Context *context, return funcCreate(context, device, properties, internalUsage); } -cl_int CommandQueue::getErrorCodeFromTaskCount(uint32_t taskCount) { +cl_int CommandQueue::getErrorCodeFromTaskCount(TaskCountType taskCount) { switch (taskCount) { case CompletionStamp::gpuHang: case CompletionStamp::outOfDeviceMemory: @@ -378,16 +378,16 @@ Device &CommandQueue::getDevice() const noexcept { return device->getDevice(); } -uint32_t CommandQueue::getHwTag() const { - uint32_t tag = *getHwTagAddress(); +TagAddressType CommandQueue::getHwTag() const { + TagAddressType tag = *getHwTagAddress(); return tag; } -volatile uint32_t *CommandQueue::getHwTagAddress() const { +volatile TagAddressType *CommandQueue::getHwTagAddress() const { return getGpgpuCommandStreamReceiver().getTagAddress(); } -bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) { +bool CommandQueue::isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState) { DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady); if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) { @@ -401,7 +401,7 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState return false; } -WaitStatus CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) { +WaitStatus CommandQueue::waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) { WAIT_ENTER() WaitStatus waitStatus{WaitStatus::Ready}; @@ -496,12 +496,12 @@ cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName, return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet); } -uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList) { +TaskCountType CommandQueue::getTaskLevelFromWaitList(TaskCountType taskLevel, + cl_uint numEventsInWaitList, + const cl_event *eventWaitList) { for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) { auto pEvent = (Event *)(eventWaitList[iEvent]); - uint32_t eventTaskLevel = pEvent->peekTaskLevel(); + TaskCountType eventTaskLevel = pEvent->peekTaskLevel(); taskLevel = std::max(taskLevel, eventTaskLevel); } return taskLevel; @@ -862,13 +862,13 @@ cl_uint CommandQueue::getQueueFamilyIndex() const { } } -void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) { +void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, TaskCountType newBcsTaskCount) { CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; state.engineType = bcsEngineType; state.taskCount = newBcsTaskCount; } -uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const { +TaskCountType CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const { const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; return state.taskCount; } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 14824963b0..8dbf68c9f7 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -58,7 +58,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool internalUsage, cl_int &errcodeRet); - static cl_int getErrorCodeFromTaskCount(uint32_t taskCount); + static cl_int getErrorCodeFromTaskCount(TaskCountType taskCount); CommandQueue() = delete; @@ -199,19 +199,19 @@ class CommandQueue : public BaseObject<_cl_command_queue> { size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); - uint32_t getHwTag() const; + TagAddressType getHwTag() const; - volatile uint32_t *getHwTagAddress() const; + volatile TagAddressType *getHwTagAddress() const; - bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState); + bool isCompleted(TaskCountType gpgpuTaskCount, CopyEngineState bcsState); bool isWaitForTimestampsEnabled() const; - virtual bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0; + virtual bool waitForTimestamps(Range copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0; MOCKABLE_VIRTUAL bool isQueueBlocked(); - MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); - MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { + MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); + MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { return this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false); } MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList); @@ -219,9 +219,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { return this->waitForAllEngines(blockedQueue, printfHandler, true); } - static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList); + static TaskCountType getTaskLevelFromWaitList(TaskCountType taskLevel, + cl_uint numEventsInWaitList, + const cl_event *eventWaitList); void initializeGpgpu() const; void initializeGpgpuInternals() const; @@ -330,8 +330,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { template static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation); - void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount); - uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const; + void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, TaskCountType newBcsTaskCount); + TaskCountType peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const; void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; } EnqueueProperties::Operation peekLatestSentEnqueueOperation() { return this->latestSentEnqueueType; } @@ -343,10 +343,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void clearLastBcsPackets(); // taskCount of last task - uint32_t taskCount = 0; + TaskCountType taskCount = 0; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. - uint32_t taskLevel = 0; + TaskCountType taskLevel = 0; std::unique_ptr flushStamp; @@ -370,7 +370,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); - virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; + virtual void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const; MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr); diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 216ce6de07..00a3205603 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -375,7 +375,7 @@ class CommandQueueHw : public CommandQueue { TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, - uint32_t taskLevel, + TaskCountType taskLevel, PrintfHandler *printfHandler); void enqueueBlocked(uint32_t commandType, @@ -399,7 +399,7 @@ class CommandQueueHw : public CommandQueue { TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, - uint32_t taskLevel, + TaskCountType taskLevel, CsrDependencies &csrDeps, CommandStreamReceiver *bcsCsr); void processDispatchForCacheFlush(Surface **surfaces, @@ -427,7 +427,7 @@ class CommandQueueHw : public CommandQueue { bool isCacheFlushCommand(uint32_t commandType) const override; - bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override; + bool waitForTimestamps(Range copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override; MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const; @@ -481,8 +481,8 @@ class CommandQueueHw : public CommandQueue { bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override; - bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); - void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; + bool isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); + void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 8040661212..26159e66d6 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -167,7 +167,7 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container } template -bool CommandQueueHw::waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) { +bool CommandQueueHw::waitForTimestamps(Range copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) { using TSPacketType = typename Family::TimestampPacketType; bool waited = false; diff --git a/opencl/source/command_queue/copy_engine_state.h b/opencl/source/command_queue/copy_engine_state.h index ee9dafd16b..b87e42acc7 100644 --- a/opencl/source/command_queue/copy_engine_state.h +++ b/opencl/source/command_queue/copy_engine_state.h @@ -7,12 +7,14 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" + #include "aubstream/engine_node.h" namespace NEO { struct CopyEngineState { aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES; - uint32_t taskCount = 0; + TaskCountType taskCount = 0; bool isValid() const { return engineType != aub_stream::EngineType::NUM_ENGINES; diff --git a/opencl/source/command_queue/cpu_data_transfer_handler.cpp b/opencl/source/command_queue/cpu_data_transfer_handler.cpp index dd3e8fff95..165fd33286 100644 --- a/opencl/source/command_queue/cpu_data_transfer_handler.cpp +++ b/opencl/source/command_queue/cpu_data_transfer_handler.cpp @@ -56,7 +56,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie auto commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto blockQueue = false; - auto taskLevel = 0u; + TaskCountType taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType); DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 169b5a90d4..dcd88b4ee0 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -156,7 +156,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); auto blockQueue = false; - auto taskLevel = 0u; + TaskCountType taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType); enqueueHandlerHook(commandType, multiDispatchInfo); @@ -623,7 +623,7 @@ void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(Comm } template -void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) { +void CommandQueueHw::obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) { auto isQueueBlockedStatus = isQueueBlocked(); taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList); blockQueueStatus = (taskLevel == CompletionStamp::notReady) || isQueueBlockedStatus; @@ -638,7 +638,7 @@ void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &ta } template -bool CommandQueueHw::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) { +bool CommandQueueHw::isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) { bool updateTaskLevel = true; // if we are blocked by user event then no update if (taskLevel == CompletionStamp::notReady) { @@ -681,7 +681,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, - uint32_t taskLevel, + TaskCountType taskLevel, PrintfHandler *printfHandler) { UNRECOVERABLE_IF(multiDispatchInfo.empty()); @@ -991,7 +991,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, - uint32_t taskLevel, + TaskCountType taskLevel, CsrDependencies &csrDeps, CommandStreamReceiver *bcsCsr) { @@ -1234,7 +1234,7 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp } auto blockQueue = false; - auto taskLevel = 0u; + TaskCountType taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType); auto clearAllDependencies = queueDependenciesClearRequired(); diff --git a/opencl/source/event/async_events_handler.cpp b/opencl/source/event/async_events_handler.cpp index 7619721e7b..374ba05bb7 100644 --- a/opencl/source/event/async_events_handler.cpp +++ b/opencl/source/event/async_events_handler.cpp @@ -38,7 +38,7 @@ void AsyncEventsHandler::registerEvent(Event *event) { } Event *AsyncEventsHandler::processList() { - uint32_t lowestTaskCount = CompletionStamp::notReady; + TaskCountType lowestTaskCount = CompletionStamp::notReady; Event *sleepCandidate = nullptr; pendingList.clear(); diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 84141fe10c..067856194b 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -8,6 +8,7 @@ #include "opencl/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" @@ -30,13 +31,12 @@ #include namespace NEO { - Event::Event( Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, - uint32_t taskCount) + TaskCountType taskLevel, + TaskCountType taskCount) : taskLevel(taskLevel), currentCmdQVirtualEvent(false), cmdToSubmit(nullptr), @@ -90,8 +90,8 @@ Event::Event( Event::Event( CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, - uint32_t taskCount) + TaskCountType taskLevel, + TaskCountType taskCount) : Event(nullptr, cmdQueue, cmdType, taskLevel, taskCount) { } @@ -224,7 +224,7 @@ void Event::setupBcs(aub_stream::EngineType bcsEngineType) { this->bcsState.engineType = bcsEngineType; } -uint32_t Event::peekBcsTaskCountFromCommandQueue() { +TaskCountType Event::peekBcsTaskCountFromCommandQueue() { if (bcsState.isValid()) { return this->cmdQueue->peekBcsTaskCount(bcsState.engineType); } else { @@ -232,11 +232,11 @@ uint32_t Event::peekBcsTaskCountFromCommandQueue() { } } -uint32_t Event::getCompletionStamp() const { +TaskCountType Event::getCompletionStamp() const { return this->taskCount; } -void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) { +void Event::updateCompletionStamp(TaskCountType gpgpuTaskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp) { this->taskCount = gpgpuTaskCount; this->bcsState.taskCount = bcsTaskCount; this->taskLevel = tasklevel; @@ -500,7 +500,7 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) { (void)status; DEBUG_BREAK_IF(!(isStatusCompleted(status) || (peekIsSubmitted(status)))); - uint32_t taskLevelToPropagate = CompletionStamp::notReady; + TaskCountType taskLevelToPropagate = CompletionStamp::notReady; if (isStatusCompletedByTermination(transitionStatus) == false) { // if we are event on top of the tree , obtain taskLevel from CSR @@ -728,11 +728,11 @@ bool Event::areTimestampsCompleted() { return false; } -uint32_t Event::getTaskLevel() { +TaskCountType Event::getTaskLevel() { return taskLevel; } -inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { +inline void Event::unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) { int32_t numEventsBlockingThis = --parentCount; DEBUG_BREAK_IF(numEventsBlockingThis < 0); @@ -901,7 +901,7 @@ bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_eve return userEventsDependencies; } -uint32_t Event::peekTaskLevel() const { +TaskCountType Event::peekTaskLevel() const { return taskLevel; } diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index 1111e484c4..7468754286 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -82,7 +82,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { static constexpr cl_int executionAbortedDueToGpuHang = -777; Event(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount); + TaskCountType taskLevel, TaskCountType taskCount); Event(const Event &) = delete; Event &operator=(const Event &) = delete; @@ -90,10 +90,10 @@ class Event : public BaseObject<_cl_event>, public IDNode { ~Event() override; void setupBcs(aub_stream::EngineType bcsEngineType); - uint32_t peekBcsTaskCountFromCommandQueue(); + TaskCountType peekBcsTaskCountFromCommandQueue(); - uint32_t getCompletionStamp() const; - void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp); + TaskCountType getCompletionStamp() const; + void updateCompletionStamp(TaskCountType taskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp); cl_ulong getDelta(cl_ulong startTime, cl_ulong endTime); void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; } @@ -130,9 +130,9 @@ class Event : public BaseObject<_cl_event>, public IDNode { TagNodeBase *getHwPerfCounterNode(); std::unique_ptr flushStamp; - std::atomic taskLevel; + std::atomic taskLevel; - uint32_t peekTaskLevel() const; + TaskCountType peekTaskLevel() const; void addChild(Event &e); virtual bool setStatus(cl_int status); @@ -239,7 +239,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { return cmdType; } - virtual uint32_t getTaskLevel(); + virtual TaskCountType getTaskLevel(); cl_int peekExecutionStatus() const { return executionStatus; @@ -254,16 +254,16 @@ class Event : public BaseObject<_cl_event>, public IDNode { return (peekNumEventsBlockingThis() > 0); } - virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus); + virtual void unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus); - void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) { + void updateTaskCount(TaskCountType gpgpuTaskCount, TaskCountType bcsTaskCount) { if (gpgpuTaskCount == CompletionStamp::notReady) { DEBUG_BREAK_IF(true); return; } this->bcsState.taskCount = bcsTaskCount; - uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount); + TaskCountType prevTaskCount = this->taskCount.exchange(gpgpuTaskCount); if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) { this->taskCount = prevTaskCount; DEBUG_BREAK_IF(true); @@ -281,7 +281,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { virtual void updateExecutionStatus(); void tryFlushEvent(); - uint32_t peekTaskCount() const { + TaskCountType peekTaskCount() const { return this->taskCount; } @@ -311,7 +311,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { protected: Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount); + TaskCountType taskLevel, TaskCountType taskCount); ECallbackTarget translateToCallbackTarget(cl_int execStatus) { switch (execStatus) { @@ -394,6 +394,6 @@ class Event : public BaseObject<_cl_event>, public IDNode { private: // can be accessed only with updateTaskCount - std::atomic taskCount; + std::atomic taskCount; }; } // namespace NEO diff --git a/opencl/source/event/user_event.cpp b/opencl/source/event/user_event.cpp index 1631f46228..1d7b88ff4f 100644 --- a/opencl/source/event/user_event.cpp +++ b/opencl/source/event/user_event.cpp @@ -33,7 +33,7 @@ WaitStatus UserEvent::wait(bool blocking, bool useQuickKmdSleep) { return WaitStatus::Ready; } -uint32_t UserEvent::getTaskLevel() { +TaskCountType UserEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } @@ -64,8 +64,8 @@ WaitStatus VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) { return WaitStatus::Ready; } -uint32_t VirtualEvent::getTaskLevel() { - uint32_t taskLevel = 0; +TaskCountType VirtualEvent::getTaskLevel() { + TaskCountType taskLevel = 0; if (cmdQueue != nullptr) { auto &csr = cmdQueue->getGpgpuCommandStreamReceiver(); taskLevel = csr.peekTaskLevel(); diff --git a/opencl/source/event/user_event.h b/opencl/source/event/user_event.h index e5c69027f3..3271599d9c 100644 --- a/opencl/source/event/user_event.h +++ b/opencl/source/event/user_event.h @@ -22,7 +22,7 @@ class UserEvent : public Event { void updateExecutionStatus() override; - uint32_t getTaskLevel() override; + TaskCountType getTaskLevel() override; bool isInitialEventStatus() const; }; @@ -39,6 +39,6 @@ class VirtualEvent : public Event { void updateExecutionStatus() override; - uint32_t getTaskLevel() override; + TaskCountType getTaskLevel() override; }; } // namespace NEO diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 5651bfa6ad..e2b1febec6 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -169,7 +169,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) { } } -void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { +void gtpinNotifyFlushTask(TaskCountType flushedTaskCount) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); @@ -185,7 +185,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { } } -void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { +void gtpinNotifyTaskCompletion(TaskCountType completedTaskCount) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems;) { diff --git a/opencl/source/gtpin/gtpin_defs.h b/opencl/source/gtpin/gtpin_defs.h index e2f1766bb0..1f5584c303 100644 --- a/opencl/source/gtpin/gtpin_defs.h +++ b/opencl/source/gtpin/gtpin_defs.h @@ -20,7 +20,7 @@ struct GTPinKernelExec { cl_mem gtpinResource; CommandQueue *pCommandQueue; gtpin::command_buffer_handle_t commandBuffer; - uint32_t taskCount; + TaskCountType taskCount; bool isTaskCountValid; bool isResourceResident; diff --git a/opencl/source/gtpin/gtpin_notify.h b/opencl/source/gtpin/gtpin_notify.h index 5bcba6a907..86e87bf313 100644 --- a/opencl/source/gtpin/gtpin_notify.h +++ b/opencl/source/gtpin/gtpin_notify.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,8 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" + #include "CL/cl.h" #include @@ -19,8 +21,8 @@ void gtpinNotifyContextDestroy(cl_context context); void gtpinNotifyKernelCreate(cl_kernel kernel); void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue); void gtpinNotifyPreFlushTask(void *pCmdQueue); -void gtpinNotifyFlushTask(uint32_t flushedTaskCount); -void gtpinNotifyTaskCompletion(uint32_t completedTaskCount); +void gtpinNotifyFlushTask(TaskCountType flushedTaskCount); +void gtpinNotifyTaskCompletion(TaskCountType completedTaskCount); void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver); void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector); void gtpinNotifyPlatformShutdown(); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 31b4783774..aca6eda809 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -35,7 +35,7 @@ CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj, memObj.incRefInternal(); } -CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { +CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminated) { DecRefInternalAtScopeEnd decRefInternalAtScopeEnd{memObj}; if (terminated) { @@ -130,7 +130,7 @@ CommandComputeKernel::~CommandComputeKernel() { kernel->decRefInternal(); } -CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) { +CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool terminated) { if (terminated) { this->terminated = true; for (auto surface : surfaces) { @@ -295,7 +295,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate return completionStamp; } -uint32_t CommandWithoutKernel::dispatchBlitOperation() { +TaskCountType CommandWithoutKernel::dispatchBlitOperation() { auto bcsCsr = kernelOperation->bcsCsr; UNRECOVERABLE_IF(bcsCsr == nullptr); @@ -322,7 +322,7 @@ uint32_t CommandWithoutKernel::dispatchBlitOperation() { return newTaskCount; } -CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) { +CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool terminated) { if (terminated) { this->terminated = true; return completionStamp; diff --git a/opencl/source/helpers/task_information.h b/opencl/source/helpers/task_information.h index be87b0000d..decbf2e112 100644 --- a/opencl/source/helpers/task_information.h +++ b/opencl/source/helpers/task_information.h @@ -85,7 +85,7 @@ class Command : public IFNode { public: // returns command's taskCount obtained from completion stamp // as acquired from command stream receiver - virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0; + virtual CompletionStamp &submit(TaskCountType taskLevel, bool terminated) = 0; Command() = delete; Command(CommandQueue &commandQueue); @@ -117,7 +117,7 @@ class CommandMapUnmap : public Command { CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, CommandQueue &commandQueue); ~CommandMapUnmap() override = default; - CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; + CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override; private: MemObj &memObj; @@ -135,7 +135,7 @@ class CommandComputeKernel : public Command { ~CommandComputeKernel() override; - CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; + CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override; LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); } Kernel *peekKernel() const { return kernel; } @@ -155,7 +155,7 @@ class CommandComputeKernel : public Command { class CommandWithoutKernel : public Command { public: using Command::Command; - CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; - uint32_t dispatchBlitOperation(); + CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override; + TaskCountType dispatchBlitOperation(); }; } // namespace NEO diff --git a/opencl/source/sharings/gl/gl_arb_sync_event.h b/opencl/source/sharings/gl/gl_arb_sync_event.h index afc2dba36b..5770498a65 100644 --- a/opencl/source/sharings/gl/gl_arb_sync_event.h +++ b/opencl/source/sharings/gl/gl_arb_sync_event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -30,7 +30,7 @@ class GlArbSyncEvent : public Event { GlArbSyncEvent() = delete; ~GlArbSyncEvent() override; - void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) override; + void unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) override; static GlArbSyncEvent *create(Event &baseEvent); diff --git a/opencl/source/sharings/gl/gl_sync_event.h b/opencl/source/sharings/gl/gl_sync_event.h index 9ff8e0aa25..a5f8ff4481 100644 --- a/opencl/source/sharings/gl/gl_sync_event.h +++ b/opencl/source/sharings/gl/gl_sync_event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ class GlSyncEvent : public Event { static GlSyncEvent *create(Context &context, cl_GLsync sync, cl_int *errCode); void updateExecutionStatus() override; - uint32_t getTaskLevel() override; + TaskCountType getTaskLevel() override; bool isExternallySynchronized() const override { return true; } diff --git a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp index 309d85d7fe..eb244c499b 100644 --- a/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp +++ b/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -201,7 +201,7 @@ GlArbSyncEvent *GlArbSyncEvent::create(Event &baseEvent) { return arbSyncEvent; } -void GlArbSyncEvent::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { +void GlArbSyncEvent::unblockEventBy(Event &event, TaskCountType taskLevel, int32_t transitionStatus) { DEBUG_BREAK_IF(&event != this->baseEvent); if ((transitionStatus > CL_SUBMITTED) || (transitionStatus < 0)) { return; diff --git a/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp b/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp index 25c0209b7f..c261898acc 100644 --- a/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp +++ b/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ void GlSyncEvent::updateExecutionStatus() { } } -uint32_t GlSyncEvent::getTaskLevel() { +TaskCountType GlSyncEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } diff --git a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl index 4ae2e2d141..478b68857b 100644 --- a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl @@ -131,7 +131,7 @@ TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsI struct GTPinMockCommandQueue : MockCommandQueue { GTPinMockCommandQueue(Context *context, MockClDevice *device) : MockCommandQueue(context, device, nullptr, false) {} - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { return MockCommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, true); } diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h b/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h index a11a131eff..c0e1b3f83e 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h @@ -99,7 +99,7 @@ class AUBCommandStreamFixture : public CommandStreamFixture { return graphicsAllocation; } CommandStreamReceiver *pCommandStreamReceiver = nullptr; - volatile uint32_t *pTagMemory = nullptr; + volatile TagAddressType *pTagMemory = nullptr; private: CommandQueue *commandQueue = nullptr; diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index 85575e3a3e..52c0bcee7e 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -254,7 +254,7 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { + uint32_t taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index aae41c882c..b87838d4f2 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1012,7 +1012,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; @@ -1069,17 +1069,17 @@ class CommandStreamReceiverHwMock : public CommandStreamReceiverHw { const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {} - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCounter++; return waitForTaskCountWithKmdNotifyFallbackReturnValue; } - WaitStatus waitForTaskCount(uint32_t requiredTaskCount) override { + WaitStatus waitForTaskCount(TaskCountType requiredTaskCount) override { waitForTaskCountCalledCounter++; return waitForTaskCountReturnValue; } - WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { + WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) override { waitForTaskCountAndCleanTemporaryAllocationListCalledCounter++; return waitForTaskCountAndCleanTemporaryAllocationListReturnValue; } @@ -1126,7 +1126,7 @@ HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseW CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver(); cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool cleanTemporaryAllocationList = false; StackVec activeBcsStates{}; @@ -1146,7 +1146,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEq CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver(); cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool cleanTemporaryAllocationList = true; StackVec activeBcsStates{}; @@ -1165,7 +1165,7 @@ HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenW CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver(); cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool skipWait = true; StackVec activeBcsStates{}; @@ -1184,7 +1184,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaiting CommandStreamReceiver *oldCommandStreamReceiver = &cmdQ->getGpgpuCommandStreamReceiver(); cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{}; @@ -1212,7 +1212,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; @@ -1245,7 +1245,7 @@ HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteT cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; @@ -1279,7 +1279,7 @@ HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteT cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); - constexpr uint32_t taskCount = 0u; + constexpr TaskCountType taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 91e2955cbc..c29355a9ad 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -112,7 +112,7 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitUntilCompleteCalled = true; return CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 507b998332..12cd927074 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -810,7 +810,7 @@ class MyCmdQ : public MockCommandQueueHw { auxTranslationDirection); } - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitCalled++; return MockCommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp index 69b465cdf0..a849716586 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ TEST_F(EventTests, WhenWaitingForEventThenPipeControlIsNotInserted) { retVal = Event::waitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, retVal); //we expect event is completed - uint32_t taskCountOfEvent = pEvent->peekTaskCount(); + TaskCountType taskCountOfEvent = pEvent->peekTaskCount(); EXPECT_LE(taskCountOfEvent, pCmdQ->getHwTag()); // no more tasks after WFE, no need to write PC EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp index ee9550f462..992beba26f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp @@ -47,7 +47,7 @@ HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesS threads.push_back(std::thread(function)); } - auto currentTaskCount = 0; + int64_t currentTaskCount = 0; startEnqueueProcess = true; diff --git a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp index 65c7071568..7c53dce283 100644 --- a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp @@ -291,7 +291,7 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); - uint32_t taskCount = commandStreamReceiver.peekTaskCount(); + TaskCountType taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that can be finished... @@ -478,7 +478,7 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThe EXPECT_NE(nullptr, buffer); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); - uint32_t taskCount = commandStreamReceiver.peekTaskCount(); + TaskCountType taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that map buffer needs to wait for @@ -561,7 +561,7 @@ HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsC EXPECT_EQ(retVal, CL_SUCCESS); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); - uint32_t taskCount = commandStreamReceiver.peekTaskCount(); + TaskCountType taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto ptrResult = clEnqueueMapBuffer( diff --git a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp index 267b6b537a..ff50efbde5 100644 --- a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp @@ -342,7 +342,7 @@ HWTEST_F(EnqueueMapImageTest, givenNonReadOnlyMapWithOutEventWhenMappedThenSetEv } }; - uint32_t taskCount = commandStreamReceiver.peekTaskCount(); + TaskCountType taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); // enqueue something that can be finished... @@ -711,7 +711,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith2DImageIs class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { + TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); @@ -753,7 +753,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith1DImageIs class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { + TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 33dacdf184..2004a521b2 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -165,7 +165,7 @@ struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw(0u)}; } }; diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index ea16ebcc46..c664cc31c3 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -52,7 +52,7 @@ class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); - ASSERT_NE(nullptr, const_cast(pTagMemory)); + ASSERT_NE(nullptr, const_cast(pTagMemory)); context = new NEO::MockContext(pClDevice); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 2cf09607ec..a35a1e178d 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -164,7 +164,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa MockContext ctx(pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - uint32_t taskCount = 0; + TaskCountType taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; @@ -203,7 +203,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonDcFlushWithInitialTaskCoun auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t gws = 1; - uint32_t taskCount = 0; + TaskCountType taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; @@ -237,7 +237,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenDcFlushWhenFinishingThenTaskC auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); - uint32_t taskCount = 0; + TaskCountType taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; @@ -311,7 +311,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEventIsQueriedWhenEnqueuingTh auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); - uint32_t taskCount = 0; + TaskCountType taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; @@ -346,7 +346,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapEnqueueWhenFini MockGraphicsAllocation allocation{cpuAllocation.get(), MemoryConstants::pageSize}; AlignedBuffer mockBuffer{&ctx, &allocation}; - uint32_t taskCount = 0; + TaskCountType taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; @@ -588,7 +588,7 @@ struct MockScratchController : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { @@ -607,7 +607,7 @@ struct MockScratchController : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { @@ -615,7 +615,7 @@ struct MockScratchController : public ScratchSpaceController { void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index f36762dc3f..27c8a6043a 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1867,7 +1867,7 @@ class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { savedDispatchFlags = dispatchFlags; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); @@ -2043,7 +2043,7 @@ class MockCsrWithFailingFlush : public CommandStreamReceiverHw { SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return SubmissionStatus::FAILED; } - uint32_t tag = 0; + TagAddressType tag = 0; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutIsCalledWhenFlushBatchedSubmissionsReturnsFailureThenItIsPropagated) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index ae3398c719..bc38049ba0 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -765,18 +765,18 @@ HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenPolicyValueChangedWhenFlushi } namespace CpuIntrinsicsTests { -extern volatile uint32_t *pauseAddress; -extern uint32_t pauseValue; +extern volatile TagAddressType *pauseAddress; +extern TaskCountType pauseValue; } // namespace CpuIntrinsicsTests HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitWhenTagValueSwitchesThenWaitFunctionReturnsTrue) { - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); - uint32_t taskCountToWait = 2u; + TaskCountType taskCountToWait = 2u; *mockCsr->tagAddress = 1u; @@ -788,13 +788,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitAndIndefinitelyPollWhenWaitForCompletionThenDoNotCallWaitUtils) { - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); - uint32_t taskCountToWait = 2u; + TaskCountType taskCountToWait = 2u; *mockCsr->tagAddress = 1u; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 249e9b557d..1e98bdc454 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -637,7 +637,7 @@ class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCalled++; taskCountToWaitPassed = taskCountToWait; @@ -648,7 +648,7 @@ class MyMockCsr : public UltCommandStreamReceiver { } FlushStamp flushStampToWaitPassed = 0; - uint32_t taskCountToWaitPassed = 0; + TaskCountType taskCountToWaitPassed = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; bool useQuickKmdSleepPassed = false; QueueThrottle throttlePassed = QueueThrottle::MEDIUM; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h index c91d21e5c8..46ff9c6c33 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h @@ -22,7 +22,7 @@ struct BcsTests : public Test { Test::TearDown(); } - uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) { + TaskCountType flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer container; container.push_back(blitProperties); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index 8bcaaab82e..769b667d14 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -345,7 +345,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { diff --git a/opencl/test/unit_test/device/device_tests.cpp b/opencl/test/unit_test/device/device_tests.cpp index 0c85230703..a0b7d5dc28 100644 --- a/opencl/test/unit_test/device/device_tests.cpp +++ b/opencl/test/unit_test/device/device_tests.cpp @@ -58,7 +58,7 @@ TEST_F(DeviceTest, WhenDeviceIsCreatedThenEnabledClVersionMatchesHardwareInfo) { TEST_F(DeviceTest, givenDeviceWhenEngineIsCreatedThenSetInitialValueForTag) { for (auto &engine : pDevice->allEngines) { auto tagAddress = engine.commandStreamReceiver->getTagAddress(); - ASSERT_NE(nullptr, const_cast(tagAddress)); + ASSERT_NE(nullptr, const_cast(tagAddress)); EXPECT_EQ(initialHardwareTag, *tagAddress); } } diff --git a/opencl/test/unit_test/event/async_events_handler_tests.cpp b/opencl/test/unit_test/event/async_events_handler_tests.cpp index d74d326f27..9cea668c1b 100644 --- a/opencl/test/unit_test/event/async_events_handler_tests.cpp +++ b/opencl/test/unit_test/event/async_events_handler_tests.cpp @@ -26,7 +26,7 @@ class AsyncEventsHandlerTests : public ::testing::Test { public: class MyEvent : public Event { public: - MyEvent(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) + MyEvent(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, TaskCountType taskLevel, TaskCountType taskCount) : Event(ctx, cmdQueue, cmdType, taskLevel, taskCount) { handler.reset(new MockHandler()); } @@ -34,7 +34,7 @@ class AsyncEventsHandlerTests : public ::testing::Test { //return execution status without updating return executionStatus.load(); } - void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) { + void setTaskStamp(TaskCountType taskLevel, TaskCountType taskCount) { this->taskLevel.store(taskLevel); this->updateTaskCount(taskCount, 0); } diff --git a/opencl/test/unit_test/event/event_fixture.h b/opencl/test/unit_test/event/event_fixture.h index 1e5d589d05..2e2d9b045d 100644 --- a/opencl/test/unit_test/event/event_fixture.h +++ b/opencl/test/unit_test/event/event_fixture.h @@ -69,13 +69,13 @@ struct MyUserEvent : public VirtualEvent { WaitStatus wait(bool blocking, bool quickKmdSleep) override { return VirtualEvent::wait(blocking, quickKmdSleep); }; - uint32_t getTaskLevel() override { + TaskCountType getTaskLevel() override { return VirtualEvent::getTaskLevel(); }; }; struct MyEvent : public Event { - MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) + MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } TimeStampData getQueueTimeStamp() { diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 884aa7a123..1fc3e4b475 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -102,7 +102,7 @@ TEST(Event, WhenGettingTaskLevelThenCorrectTaskLevelIsReturned) { public: TempEvent() : Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7){}; - uint32_t getTaskLevel() override { + TaskCountType getTaskLevel() override { return Event::getTaskLevel(); } }; @@ -272,7 +272,7 @@ TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenCorrectSizeIsRetur TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionStatusThenClSubmittedIsReturned) { uint32_t tagHW = 4; - uint32_t taskCount = 5; + TaskCountType taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); @@ -288,7 +288,7 @@ TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionS TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 5; - uint32_t taskCount = 5; + TaskCountType taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); @@ -304,7 +304,7 @@ TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStat TEST_F(EventTest, GivenTagCsGreaterThanTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 6; - uint32_t taskCount = 5; + TaskCountType taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); @@ -1032,7 +1032,7 @@ class MockCommand : public Command { public: using Command::Command; - CompletionStamp &submit(uint32_t taskLevel, bool terminated) override { + CompletionStamp &submit(TaskCountType taskLevel, bool terminated) override { return completionStamp; } }; @@ -1173,8 +1173,8 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) { public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, - taskLevel, taskCount) {} + TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, + taskLevel, taskCount) {} }; MockKernelWithInternals kernel(*pClDevice); @@ -1207,8 +1207,8 @@ HWTEST_F(EventTest, givenVirtualEventWhenSubmitCommandEventNotReadyAndEventWitho public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, - uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, - taskLevel, taskCount) {} + TaskCountType taskLevel, TaskCountType taskCount) : Event(cmdQueue, cmdType, + taskLevel, taskCount) {} }; auto virtualEvent = makeReleaseable(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); @@ -1642,7 +1642,7 @@ struct TestEventCsr : public UltCommandStreamReceiver { TestEventCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) {} - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); return waitForCompletionWithTimeoutResult; @@ -1651,7 +1651,7 @@ struct TestEventCsr : public UltCommandStreamReceiver { struct WaitForCompletionWithTimeoutParams { bool enableTimeout = false; int64_t timeoutMs{}; - uint32_t taskCountToWait{}; + TaskCountType taskCountToWait{}; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; diff --git a/opencl/test/unit_test/fixtures/cl_device_fixture.cpp b/opencl/test/unit_test/fixtures/cl_device_fixture.cpp index e4daa6fed7..a5cb0fc73d 100644 --- a/opencl/test/unit_test/fixtures/cl_device_fixture.cpp +++ b/opencl/test/unit_test/fixtures/cl_device_fixture.cpp @@ -28,7 +28,7 @@ void ClDeviceFixture::setUpImpl(const NEO::HardwareInfo *hardwareInfo) { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); - ASSERT_NE(nullptr, const_cast(pTagMemory)); + ASSERT_NE(nullptr, const_cast(pTagMemory)); this->osContext = pDevice->getDefaultEngine().osContext; } diff --git a/opencl/test/unit_test/fixtures/cl_device_fixture.h b/opencl/test/unit_test/fixtures/cl_device_fixture.h index 74ffd756ea..c3014e3587 100644 --- a/opencl/test/unit_test/fixtures/cl_device_fixture.h +++ b/opencl/test/unit_test/fixtures/cl_device_fixture.h @@ -25,7 +25,7 @@ struct ClDeviceFixture { MockDevice *pDevice = nullptr; MockClDevice *pClDevice = nullptr; - volatile uint32_t *pTagMemory = nullptr; + volatile TagAddressType *pTagMemory = nullptr; HardwareInfo hardwareInfo = {}; PLATFORM platformHelper = {}; OsContext *osContext = nullptr; diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index 1b9f08cfc2..ce187f18c1 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -173,7 +173,7 @@ struct UltCommandStreamReceiverTest } DispatchFlags flushTaskFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); - uint32_t taskLevel = 42; + TaskCountType taskLevel = 42; LinearStream commandStream; IndirectHeap dsh = {nullptr}; IndirectHeap ioh = {nullptr}; diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index dd789bceea..4963575206 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -1782,7 +1782,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKerne EXPECT_EQ(CL_SUCCESS, retVal); // Verify that if flush occurs on another queue then our kernel is not flushed to CSR - uint32_t taskCount = 11; + TaskCountType taskCount = 11; gtpinNotifyPreFlushTask(nullptr); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); @@ -1800,7 +1800,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKerne EXPECT_EQ(taskCount, kernelExecQueue[0].taskCount); // Verify that if previous task was completed then it does not affect our kernel - uint32_t taskCompleted = taskCount - 1; + TaskCountType taskCompleted = taskCount - 1; int prevCount4 = CommandBufferCompleteCallbackCount; gtpinNotifyTaskCompletion(taskCompleted); EXPECT_EQ(1u, kernelExecQueue.size()); diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp index 69a13f0e83..8fcc6dce7f 100644 --- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp +++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp @@ -89,7 +89,7 @@ struct KmdNotifyTests : public ::testing::Test { bool waitForFlushStampResult = true; StackVec waitForFlushStampParamsPassed{}; - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); return waitForCompletionWithTimeoutResult; @@ -98,7 +98,7 @@ struct KmdNotifyTests : public ::testing::Test { struct WaitForCompletionWithTimeoutParams { bool enableTimeout{}; int64_t timeoutMs{}; - uint32_t taskCountToWait{}; + TaskCountType taskCountToWait{}; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; @@ -123,7 +123,7 @@ struct KmdNotifyTests : public ::testing::Test { std::unique_ptr device; std::unique_ptr cmdQ; FlushStamp flushStampToWait = 1000; - uint32_t taskCountToWait = 5; + TaskCountType taskCountToWait = 5; }; HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTryCpuPolling) { @@ -362,7 +362,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffLowerThanMinimumToCheckAcLineWhenObtain MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 9; - uint32_t taskCountToWait = 10; + TaskCountType taskCountToWait = 10; EXPECT_TRUE(taskCountToWait - hwTag < KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); @@ -376,7 +376,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndDisab MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; - uint32_t taskCountToWait = 21; + TaskCountType taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); @@ -409,7 +409,7 @@ TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndEnabl MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; - uint32_t taskCountToWait = 21; + TaskCountType taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index c77dffdf4f..ead89cf244 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -221,7 +221,7 @@ class MockCsr1 : public CommandStreamReceiverHw { public: CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, - const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; return CompletionStamp(); } diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 4f8dfaf40f..541b61d9ee 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -770,8 +770,8 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinish namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; -extern volatile uint32_t *pauseAddress; -extern uint32_t pauseValue; +extern volatile TagAddressType *pauseAddress; +extern TaskCountType pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests @@ -795,18 +795,18 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u); timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u); - CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u))); + CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u))); CpuIntrinsicsTests::pauseValue = 2u; CpuIntrinsicsTests::setupPauseAddress = [&]() { - CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u))); + CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u))); }; CpuIntrinsicsTests::pauseCounter = 0u; EXPECT_FALSE(device->getUltCommandStreamReceiver().downloadAllocationCalled); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index bab58544ea..1c98625825 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -55,7 +55,7 @@ class MockCommandStreamReceiverHW : public UltCommandStreamReceiver const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { stream = &commandStream; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 1ed57ce4d6..02050c3358 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -540,10 +540,10 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { return NEO::SubmissionStatus::SUCCESS; } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { return WaitStatus::Ready; } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; CompletionStamp flushTask( LinearStream &commandStream, @@ -551,7 +551,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { CompletionStamp cs = {}; diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp index e3f66b768a..46bc3ae07b 100644 --- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp +++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp @@ -122,7 +122,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; - uint32_t notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u; + TaskCountType notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u; firstAllocation->updateTaskCount(notReadyTaskCount, commandStreamReceiver.getOsContext().getContextId()); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 12213cab63..f594fa76ae 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -37,7 +37,7 @@ struct BcsBufferTests : public ::testing::Test { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait); EXPECT_EQ(0u, flushStampToWait); @@ -49,7 +49,7 @@ struct BcsBufferTests : public ::testing::Test { return WaitStatus::Ready; } - WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { + WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) override { EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount); waitForTaskCountAndCleanAllocationListCalled++; diff --git a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp index 02ff027377..5f83948a6a 100644 --- a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp +++ b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp @@ -32,7 +32,7 @@ class MyCsr : public UltCommandStreamReceiver { MyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) {} - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); *this->getTagAddress() = getTagAddressValue; @@ -42,7 +42,7 @@ class MyCsr : public UltCommandStreamReceiver { struct WaitForCompletionWithTimeoutParams { bool enableTimeout; int64_t timeoutMs; - uint32_t taskCountToWait; + TaskCountType taskCountToWait; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; @@ -100,7 +100,7 @@ class MemObjDestructionTest : public ::testing::TestWithParam { *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady; } - constexpr static uint32_t taskCountReady = 3u; + constexpr static TaskCountType taskCountReady = 3u; ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; uint32_t contextId = 0; @@ -223,8 +223,8 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId0); memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId1); - uint32_t expectedTaskCount0{}; - uint32_t expectedTaskCount1{}; + TaskCountType expectedTaskCount0{}; + TaskCountType expectedTaskCount1{}; if (hasCallbacks) { expectedTaskCount0 = allocation->getTaskCount(osContextId0); @@ -266,7 +266,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled *mockCsr->getTagAddress() = 0; auto osContextId = mockCsr->getOsContext().getContextId(); - uint32_t expectedTaskCount{}; + TaskCountType expectedTaskCount{}; if (hasAllocatedMappedPtr) { expectedTaskCount = allocation->getTaskCount(osContextId); @@ -310,7 +310,7 @@ HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabled auto osContextId = mockCsr->getOsContext().getContextId(); - uint32_t expectedTaskCount{}; + TaskCountType expectedTaskCount{}; if (hasAllocatedMappedPtr) { expectedTaskCount = allocation->getTaskCount(osContextId); @@ -346,7 +346,7 @@ HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsy auto osContextId = mockCsr->getOsContext().getContextId(); - uint32_t expectedTaskCount = allocation->getTaskCount(osContextId); + TaskCountType expectedTaskCount = allocation->getTaskCount(osContextId); delete memObj; @@ -396,7 +396,7 @@ HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithMapAllocationWhenAsyncDestruc auto osContextId = mockCsr->getOsContext().getContextId(); - uint32_t expectedTaskCount{}; + TaskCountType expectedTaskCount{}; if (isMapAllocationUsed) { expectedTaskCount = mapAllocation->getTaskCount(osContextId); diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 6b367f06ad..884a7d08e2 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -245,7 +245,7 @@ TEST_F(MemoryAllocatorTest, WhenAllocatingGraphicsMemoryThenAllocationHasCorrect ASSERT_NE(nullptr, allocation); // initial taskCount must be -1. if not, we may kill allocation before it will be used - EXPECT_EQ((uint32_t)-1, allocation->getTaskCount(csr->getOsContext().getContextId())); + EXPECT_EQ(std::numeric_limits::max(), allocation->getTaskCount(csr->getOsContext().getContextId())); // We know we want graphics memory to be page aligned EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & (alignment - 1)); EXPECT_EQ(Sharing::nonSharedResource, allocation->peekSharedHandle()); @@ -1902,7 +1902,7 @@ TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerReadyForCleanin auto fragment4 = hostPtrManager->getFragment({alignUp(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment4); - uint32_t taskCountReady = 1; + TaskCountType taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation2), TEMPORARY_ALLOCATION, taskCountReady); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 533edb0ce5..1fe5fc2f00 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -94,7 +94,7 @@ class MockCommandQueue : public CommandQueue { return writeBufferRetValue; } - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; waitUntilCompleteCalledCount++; @@ -105,7 +105,7 @@ class MockCommandQueue : public CommandQueue { return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } @@ -212,7 +212,7 @@ class MockCommandQueue : public CommandQueue { bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; } - bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override { + bool waitForTimestamps(Range copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override { waitForTimestampsCalled = true; return false; }; @@ -227,7 +227,7 @@ class MockCommandQueue : public CommandQueue { void *writeBufferPtr = nullptr; size_t requestedCmdStreamSize = 0; GraphicsAllocation *writeMapAllocation = nullptr; - std::atomic latestTaskCountWaited{std::numeric_limits::max()}; + std::atomic latestTaskCountWaited{std::numeric_limits::max()}; std::optional waitUntilCompleteReturnValue{}; int waitUntilCompleteCalledCount{0}; }; @@ -353,7 +353,7 @@ class MockCommandQueueHw : public CommandQueueHw { useBcsCsrOnNotifyEnabled = notifyBcsCsr; } - WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; if (waitUntilCompleteReturnValue.has_value()) { return *waitUntilCompleteReturnValue; @@ -417,7 +417,7 @@ class MockCommandQueueHw : public CommandQueueHw { bool returnValue = false; } overrideIsCacheFlushForBcsRequired; BuiltinOpParams kernelParams; - std::atomic latestTaskCountWaited{std::numeric_limits::max()}; + std::atomic latestTaskCountWaited{std::numeric_limits::max()}; bool flushCalled = false; std::optional waitForAllEnginesReturnValue{}; std::optional waitUntilCompleteReturnValue{}; diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp index 03acd5577c..dd2964ae12 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp @@ -1140,7 +1140,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, EXPECT_NE(0u, mock->waitUserFenceCall.ctxId); EXPECT_EQ(-1, mock->waitUserFenceCall.timeout); - EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth); + EXPECT_EQ(Drm::ValueWidth::U64, mock->waitUserFenceCall.dataWidth); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, @@ -1234,7 +1234,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, EXPECT_EQ(0u, mock->waitUserFenceCall.ctxId); EXPECT_EQ(1000, mock->waitUserFenceCall.timeout); - EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth); + EXPECT_EQ(Drm::ValueWidth::U64, mock->waitUserFenceCall.dataWidth); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, diff --git a/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp index 3a9fd4b0d9..8dae5a78ea 100644 --- a/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp @@ -40,7 +40,7 @@ struct BlitXE_HP_CORETests : public ::testing::Test { clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } - std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { + std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); diff --git a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp index 315ff8b982..ea6bc1e9a1 100644 --- a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp @@ -40,7 +40,7 @@ struct BlitXeHpcCoreTests : public ::testing::Test { clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } - std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { + std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); diff --git a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp index 39d8f411ab..a34a085986 100644 --- a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp @@ -41,7 +41,7 @@ struct BlitXeHpgCoreTests : public ::testing::Test { clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } - std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { + std::optional flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index 001632f641..9ecf2b794d 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -52,6 +52,7 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/submission_status.h ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h + ${CMAKE_CURRENT_SOURCE_DIR}/task_count_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw.h b/shared/source/command_stream/aub_command_stream_receiver_hw.h index 49e33951fe..6f426c75d9 100644 --- a/shared/source/command_stream/aub_command_stream_receiver_hw.h +++ b/shared/source/command_stream/aub_command_stream_receiver_hw.h @@ -56,7 +56,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::flush(BatchBuffer &batch if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { if (this->standalone) { - volatile uint32_t *pollAddress = this->tagAddress; + volatile TagAddressType *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); @@ -353,7 +353,7 @@ SubmissionStatus AUBCommandStreamReceiverHw::flush(BatchBuffer &batch submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation)); if (this->standalone) { - volatile uint32_t *pollAddress = this->tagAddress; + volatile TagAddressType *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); @@ -614,7 +614,7 @@ void AUBCommandStreamReceiverHw::pollForCompletionImpl() { } template -inline WaitStatus AUBCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { +inline WaitStatus AUBCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { const auto result = CommandStreamReceiverSimulatedHw::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); pollForCompletion(); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 989325bb0f..97936e4d2c 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -180,7 +180,7 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf makeResident(*gfxAllocation); } -WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { +WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCount) { auto address = getTagAddress(); if (!skipResourceCleanup() && address) { this->downloadTagAllocation(requiredTaskCount); @@ -190,7 +190,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { return WaitStatus::Ready; } -WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) { +WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage) { WaitStatus waitStatus{WaitStatus::Ready}; auto &list = allocationUsage == TEMPORARY_ALLOCATION ? internalAllocationStorage->getTemporaryAllocations() : internalAllocationStorage->getAllocationsForReuse(); if (!list.peekIsEmpty()) { @@ -201,7 +201,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_ return waitStatus; } -WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) { +WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) { return waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION); } @@ -358,13 +358,13 @@ void CommandStreamReceiver::cleanupResources() { } } -WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) { +WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) { bool printWaitForCompletion = DebugManager.flags.LogWaitingForCompletion.get(); if (printWaitForCompletion) { printTagAddressContent(taskCountToWait, params.waitTimeout, true); } - uint32_t latestSentTaskCount = this->latestFlushedTaskCount; + TaskCountType latestSentTaskCount = this->latestFlushedTaskCount; if (latestSentTaskCount < taskCountToWait) { if (!this->flushBatchedSubmissions()) { const auto isGpuHang{isGpuHangDetected()}; @@ -391,15 +391,15 @@ bool CommandStreamReceiver::checkGpuHangDetected(TimeType currentTime, TimeType return false; } -WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams ¶ms, uint32_t taskCountToWait) { +WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams ¶ms, TaskCountType taskCountToWait) { std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime; int64_t timeDiff = 0; - uint32_t latestSentTaskCount = this->latestFlushedTaskCount; + TaskCountType latestSentTaskCount = this->latestFlushedTaskCount; if (latestSentTaskCount < taskCountToWait) { this->flushTagUpdate(); } - volatile uint32_t *partitionAddress = pollAddress; + volatile TagAddressType *partitionAddress = pollAddress; waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; @@ -438,7 +438,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddres void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { this->tagAllocation = allocation; UNRECOVERABLE_IF(allocation == nullptr); - this->tagAddress = reinterpret_cast(allocation->getUnderlyingBuffer()); + this->tagAddress = reinterpret_cast(allocation->getUnderlyingBuffer()); this->debugPauseStateAddress = reinterpret_cast( reinterpret_cast(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset); } @@ -855,7 +855,7 @@ void CommandStreamReceiver::printDeviceIndex() { } } -void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) { +void CommandStreamReceiver::checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation) { if (useNewResourceImplicitFlush) { if (allocationTaskCount == GraphicsAllocation::objectNotUsed && !GraphicsAllocation::isIsaAllocationType(gfxAllocation.getAllocationType())) { newResources = true; @@ -875,7 +875,7 @@ bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() { return false; } -void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) { +void CommandStreamReceiver::downloadTagAllocation(TaskCountType taskCountToWait) { if (this->getTagAllocation()) { if (taskCountToWait && taskCountToWait <= this->peekLatestFlushedTaskCount()) { this->downloadAllocation(*this->getTagAllocation()); @@ -883,7 +883,7 @@ void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) { } } -bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) { +bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) { this->downloadTagAllocation(taskCountToWait); for (uint32_t i = 0; i < activePartitions; i++) { if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { @@ -903,7 +903,7 @@ const RootDeviceEnvironment &CommandStreamReceiver::peekRootDeviceEnvironment() return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; } -uint32_t CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) { +TaskCountType CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) { if (completionFenceValuePointer) { return *completionFenceValuePointer; } @@ -920,7 +920,7 @@ bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) { return perDssBackedBuffer != nullptr; } -void CommandStreamReceiver::printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start) { +void CommandStreamReceiver::printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start) { auto postSyncAddress = getTagAddress(); if (start) { PRINT_DEBUG_STRING(true, stdout, @@ -941,7 +941,7 @@ LogicalStateHelper *CommandStreamReceiver::getLogicalStateHelper() const { return logicalStateHelper.get(); } -uint32_t CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) { +TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) { switch (status) { case SubmissionStatus::OUT_OF_HOST_MEMORY: return CompletionStamp::outOfHostMemory; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 344635b656..8224711d70 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -13,6 +13,7 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" @@ -86,7 +87,7 @@ class CommandStreamReceiver { virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; virtual bool flushBatchedSubmissions() = 0; MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); @@ -112,9 +113,9 @@ class CommandStreamReceiver { virtual GmmPageTableMngr *createPageTableManager() { return nullptr; } bool needsPageTableManager() const; - MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(uint32_t requiredTaskCount); - WaitStatus waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage); - MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount); + MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(TaskCountType requiredTaskCount); + WaitStatus waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage); + MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount); LinearStream &getCS(size_t minRequiredSize = 1024u); OSInterface *getOSInterface() const; @@ -129,19 +130,19 @@ class CommandStreamReceiver { return tagsMultiAllocation; } MultiGraphicsAllocation &createTagsMultiAllocation(); - volatile uint32_t *getTagAddress() const { return tagAddress; } + volatile TagAddressType *getTagAddress() const { return tagAddress; } uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; } virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; } - uint32_t peekTaskCount() const { return taskCount; } + TaskCountType peekTaskCount() const { return taskCount; } - uint32_t peekTaskLevel() const { return taskLevel; } + TaskCountType peekTaskLevel() const { return taskLevel; } FlushStamp obtainCurrentFlushStamp() const; - uint32_t peekLatestSentTaskCount() const { return latestSentTaskCount; } + TaskCountType peekLatestSentTaskCount() const { return latestSentTaskCount; } - uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; } + TaskCountType peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; } void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; } bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; } @@ -165,10 +166,10 @@ class CommandStreamReceiver { void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; } bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; } - virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0; - virtual WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait); - WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams ¶ms, uint32_t taskCountToWait); - MOCKABLE_VIRTUAL bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait); + virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0; + virtual WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait); + WaitStatus baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams ¶ms, TaskCountType taskCountToWait); + MOCKABLE_VIRTUAL bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait); virtual void downloadAllocations(){}; void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; } @@ -225,14 +226,14 @@ class CommandStreamReceiver { virtual MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const = 0; - void setLatestSentTaskCount(uint32_t latestSentTaskCount) { + void setLatestSentTaskCount(TaskCountType latestSentTaskCount) { this->latestSentTaskCount = latestSentTaskCount; } - void setLatestFlushedTaskCount(uint32_t latestFlushedTaskCount) { + void setLatestFlushedTaskCount(TaskCountType latestFlushedTaskCount) { this->latestFlushedTaskCount = latestFlushedTaskCount; } - virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0; + virtual TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0; virtual SubmissionStatus flushTagUpdate() = 0; virtual void updateTagFromWait() = 0; @@ -333,7 +334,7 @@ class CommandStreamReceiver { MOCKABLE_VIRTUAL bool checkGpuHangDetected(TimeType currentTime, TimeType &lastHangCheckTime) const; uint64_t getCompletionAddress() const { - uint64_t completionFenceAddress = castToUint64(const_cast(getTagAddress())); + uint64_t completionFenceAddress = castToUint64(const_cast(tagAddress)); if (completionFenceAddress == 0) { return 0; } @@ -341,7 +342,7 @@ class CommandStreamReceiver { return completionFenceAddress; } - uint32_t getCompletionValue(const GraphicsAllocation &gfxAllocation); + TaskCountType getCompletionValue(const GraphicsAllocation &gfxAllocation); DispatchMode getDispatchMode() const { return this->dispatchMode; } @@ -387,10 +388,10 @@ class CommandStreamReceiver { protected: void cleanupResources(); void printDeviceIndex(); - void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation); + void checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation); bool checkImplicitFlushForGpuIdle(); - void downloadTagAllocation(uint32_t taskCountToWait); - void printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start); + void downloadTagAllocation(TaskCountType taskCountToWait); + void printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start); [[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock obtainHostPtrSurfaceCreationLock(); std::unique_ptr flushStamp; @@ -421,7 +422,7 @@ class CommandStreamReceiver { const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte; uint64_t totalMemoryUsed = 0u; - volatile uint32_t *tagAddress = nullptr; + volatile TagAddressType *tagAddress = nullptr; volatile DebugPauseState *debugPauseStateAddress = nullptr; SpinLock debugPauseStateLock; static void *asyncDebugBreakConfirmation(void *arg); @@ -441,14 +442,14 @@ class CommandStreamReceiver { IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES]; OsContext *osContext = nullptr; - uint32_t *completionFenceValuePointer = nullptr; + TaskCountType *completionFenceValuePointer = nullptr; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. - std::atomic taskLevel{0}; - std::atomic latestSentTaskCount{0}; - std::atomic latestFlushedTaskCount{0}; + std::atomic taskLevel{0}; + std::atomic latestSentTaskCount{0}; + std::atomic latestFlushedTaskCount{0}; // taskCount - # of tasks submitted - std::atomic taskCount{0}; + std::atomic taskCount{0}; std::atomic numClients = 0u; @@ -470,7 +471,7 @@ class CommandStreamReceiver { uint32_t activePartitionsConfig = 1; uint32_t postSyncWriteOffset = 0; uint32_t completionFenceOffset = 0; - uint32_t completionFenceValue = 0; + TaskCountType completionFenceValue = 0; const uint32_t rootDeviceIndex; const DeviceBitfield deviceBitfield; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 1aa566c358..ffb0d1df72 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -43,7 +43,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override; + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; void forcePipeControl(NEO::LinearStream &commandStreamCSR); @@ -77,7 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { bool isPipelineSelectAlreadyProgrammed() const; void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo); - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; void collectStateBaseAddresPatchInfo( uint64_t commandBufferAddress, @@ -96,7 +96,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { return CommandStreamReceiverType::CSR_HW; } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override; SubmissionStatus flushTagUpdate() override; SubmissionStatus flushMiFlushDW(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index c061b852ef..4ed0e9e8e8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -178,7 +178,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; @@ -926,7 +926,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPipelineSelect() } template -inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { +inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(), this->isAnyDirectSubmissionEnabled()); @@ -1075,7 +1075,7 @@ inline void CommandStreamReceiverHw::unregisterDirectSubmissionFromCo } template -uint32_t CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) { +TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h index ed0853a643..e0d9b8e59f 100644 --- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h +++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h @@ -40,7 +40,7 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR { return CommandStreamReceiverType::CSR_HW_WITH_AUB; } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; size_t getPreferredTagPoolSize() const override { return 1; } diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl index 36a408d754..12ee85f55c 100644 --- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl +++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl @@ -78,7 +78,7 @@ void CommandStreamReceiverWithAUBDump::setupContext(OsContext &osContex } template -WaitStatus CommandStreamReceiverWithAUBDump::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, +WaitStatus CommandStreamReceiverWithAUBDump::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { if (aubCSR) { aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); diff --git a/shared/source/command_stream/csr_deps.h b/shared/source/command_stream/csr_deps.h index 3003db5f22..a601179e88 100644 --- a/shared/source/command_stream/csr_deps.h +++ b/shared/source/command_stream/csr_deps.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/utilities/stackvec.h" namespace NEO { @@ -21,7 +22,7 @@ class CsrDependencies { All }; - StackVec, 32> taskCountContainer; + StackVec, 32> taskCountContainer; StackVec timestampPacketContainer; void makeResident(CommandStreamReceiver &commandStreamReceiver) const; diff --git a/shared/source/command_stream/scratch_space_controller.h b/shared/source/command_stream/scratch_space_controller.h index c684dbd1ea..56ae9e21c3 100644 --- a/shared/source/command_stream/scratch_space_controller.h +++ b/shared/source/command_stream/scratch_space_controller.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ class ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; @@ -62,14 +62,14 @@ class ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/shared/source/command_stream/scratch_space_controller_base.cpp b/shared/source/command_stream/scratch_space_controller_base.cpp index d77d20de3e..6d1c05658b 100644 --- a/shared/source/command_stream/scratch_space_controller_base.cpp +++ b/shared/source/command_stream/scratch_space_controller_base.cpp @@ -26,7 +26,7 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { @@ -85,7 +85,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer, uint32_t offset, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { @@ -94,7 +94,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer, void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/shared/source/command_stream/scratch_space_controller_base.h b/shared/source/command_stream/scratch_space_controller_base.h index 26a548830a..f459c08fed 100644 --- a/shared/source/command_stream/scratch_space_controller_base.h +++ b/shared/source/command_stream/scratch_space_controller_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,7 +18,7 @@ class ScratchSpaceControllerBase : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; @@ -31,14 +31,14 @@ class ScratchSpaceControllerBase : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp index afe8329e4d..4b82201468 100644 --- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp +++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp @@ -54,7 +54,7 @@ void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAd uint32_t offset, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { @@ -135,7 +135,7 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, @@ -152,7 +152,7 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, @@ -193,7 +193,7 @@ void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContain uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h index c0f169cc0a..1fd2060c66 100644 --- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h +++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; @@ -38,14 +38,14 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, @@ -56,7 +56,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation); MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, diff --git a/shared/source/command_stream/submissions_aggregator.h b/shared/source/command_stream/submissions_aggregator.h index fcc59aaf9d..6119f34454 100644 --- a/shared/source/command_stream/submissions_aggregator.h +++ b/shared/source/command_stream/submissions_aggregator.h @@ -63,7 +63,7 @@ struct CommandBuffer : public IDNode { BatchBuffer batchBuffer; void *batchBufferEndLocation = nullptr; uint32_t inspectionId = 0; - uint32_t taskCount = 0u; + TaskCountType taskCount = 0u; void *pipeControlThatMayBeErasedLocation = nullptr; void *epiloguePipeControlLocation = nullptr; PipeControlArgs epiloguePipeControlArgs; diff --git a/shared/source/command_stream/task_count_helper.h b/shared/source/command_stream/task_count_helper.h new file mode 100644 index 0000000000..5fe0423ec5 --- /dev/null +++ b/shared/source/command_stream/task_count_helper.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include + +using TaskCountType = uint32_t; +using TagAddressType = uint32_t; \ No newline at end of file diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index 3f369bd9c7..bcfdac3ca1 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -33,7 +33,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::initAdditionalMMIO; @@ -45,8 +45,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::expectMemory(const void *gfxAddress, } template -void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(uint32_t taskCountToWait) { +void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) { this->flushBatchedSubmissions(); if (this->latestFlushedTaskCount < taskCountToWait) { this->flushTagUpdate(); } - volatile uint32_t *pollAddress = this->getTagAddress(); + volatile TagAddressType *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); @@ -497,13 +497,13 @@ void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocatio } template -WaitStatus TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { +WaitStatus TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } template -WaitStatus TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) { +WaitStatus TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait); } @@ -554,7 +554,7 @@ void TbxCommandStreamReceiverHw::downloadAllocationTbx(GraphicsAlloca template void TbxCommandStreamReceiverHw::downloadAllocations() { - volatile uint32_t *pollAddress = this->getTagAddress(); + volatile TagAddressType *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); diff --git a/shared/source/direct_submission/direct_submission_controller.h b/shared/source/direct_submission/direct_submission_controller.h index 1ae8a3241f..a596c4fd67 100644 --- a/shared/source/direct_submission/direct_submission_controller.h +++ b/shared/source/direct_submission/direct_submission_controller.h @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/common_types.h" #include @@ -35,7 +36,7 @@ class DirectSubmissionController { protected: struct DirectSubmissionState { bool isStopped = true; - uint32_t taskCount = 0u; + TaskCountType taskCount = 0u; }; static void *controlDirectSubmissionsState(void *self); diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 163bdd07cf..022af122bf 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -87,7 +87,7 @@ class DirectSubmissionHw { static std::unique_ptr> create(const DirectSubmissionInputParams &inputParams); - virtual uint32_t *getCompletionValuePointer() { return nullptr; } + virtual TaskCountType *getCompletionValuePointer() { return nullptr; } bool isRelaxedOrderingEnabled() const { return relaxedOrderingEnabled; diff --git a/shared/source/direct_submission/linux/drm_direct_submission.h b/shared/source/direct_submission/linux/drm_direct_submission.h index 9e865fb6cf..cb224db37c 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.h +++ b/shared/source/direct_submission/linux/drm_direct_submission.h @@ -20,7 +20,7 @@ class DrmDirectSubmission : public DirectSubmissionHw { ~DrmDirectSubmission() override; - uint32_t *getCompletionValuePointer() override; + TaskCountType *getCompletionValuePointer() override; protected: bool allocateOsResources() override; @@ -37,10 +37,10 @@ class DrmDirectSubmission : public DirectSubmissionHw { bool isCompleted(uint32_t ringBufferIndex) override; bool isCompletionFenceSupported(); - MOCKABLE_VIRTUAL void wait(uint32_t taskCountToWait); + MOCKABLE_VIRTUAL void wait(TaskCountType taskCountToWait); TagData currentTagData{}; - volatile uint32_t *tagAddress; - uint32_t completionFenceValue{}; + volatile TagAddressType *tagAddress; + TaskCountType completionFenceValue{}; }; } // namespace NEO diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 77c2b77f15..3305f57f86 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -82,7 +82,7 @@ inline DrmDirectSubmission::~DrmDirectSubmission() { } template -uint32_t *DrmDirectSubmission::getCompletionValuePointer() { +TaskCountType *DrmDirectSubmission::getCompletionValuePointer() { if (this->isCompletionFenceSupported()) { return &this->completionFenceValue; } @@ -93,7 +93,7 @@ template bool DrmDirectSubmission::allocateOsResources() { this->currentTagData.tagAddress = this->semaphoreGpuVa + offsetof(RingSemaphoreData, tagAllocation); this->currentTagData.tagValue = 0u; - this->tagAddress = reinterpret_cast(reinterpret_cast(this->semaphorePtr) + offsetof(RingSemaphoreData, tagAllocation)); + this->tagAddress = reinterpret_cast(reinterpret_cast(this->semaphorePtr) + offsetof(RingSemaphoreData, tagAllocation)); return true; } @@ -116,7 +116,7 @@ bool DrmDirectSubmission::submit(uint64_t gpuAddress, siz bool ret = false; uint32_t drmContextId = 0u; - uint32_t completionValue = 0u; + TaskCountType completionValue = 0u; uint64_t completionFenceGpuAddress = 0u; if (this->isCompletionFenceSupported()) { completionValue = ++completionFenceValue; @@ -246,7 +246,7 @@ bool DrmDirectSubmission::isCompletionFenceSupported() { } template -void DrmDirectSubmission::wait(uint32_t taskCountToWait) { +void DrmDirectSubmission::wait(TaskCountType taskCountToWait) { auto pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activeTiles; i++) { while (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { diff --git a/shared/source/helpers/completion_stamp.h b/shared/source/helpers/completion_stamp.h index d14a317b9e..1a9e6c15cf 100644 --- a/shared/source/helpers/completion_stamp.h +++ b/shared/source/helpers/completion_stamp.h @@ -7,22 +7,25 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" + #include +#include namespace NEO { using FlushStamp = uint64_t; enum class SubmissionStatus : uint32_t; struct CompletionStamp { - static uint32_t getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus); + static TaskCountType getTaskCountFromSubmissionStatusError(SubmissionStatus submissionStatus); - uint32_t taskCount; - uint32_t taskLevel; + TaskCountType taskCount; + TaskCountType taskLevel; FlushStamp flushStamp; - static constexpr uint32_t notReady = 0xFFFFFFF0; - static constexpr uint32_t gpuHang = 0xFFFFFFFA; - static constexpr uint32_t outOfDeviceMemory = 0xFFFFFFFB; - static constexpr uint32_t outOfHostMemory = 0xFFFFFFFC; + static constexpr TaskCountType notReady = std::numeric_limits::max() - 0xF; + static constexpr TaskCountType gpuHang = std::numeric_limits::max() - 0x5; + static constexpr TaskCountType outOfDeviceMemory = std::numeric_limits::max() - 0x4; + static constexpr TaskCountType outOfHostMemory = std::numeric_limits::max() - 0x3; }; } // namespace NEO diff --git a/shared/source/helpers/kmd_notify_properties.cpp b/shared/source/helpers/kmd_notify_properties.cpp index 926474d95d..9b605d1c69 100644 --- a/shared/source/helpers/kmd_notify_properties.cpp +++ b/shared/source/helpers/kmd_notify_properties.cpp @@ -8,6 +8,7 @@ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/command_stream/queue_throttle.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include @@ -16,8 +17,8 @@ using namespace NEO; WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest, - uint32_t currentHwTag, - uint32_t taskCountToWait, + TagAddressType currentHwTag, + TaskCountType taskCountToWait, FlushStamp flushStampToWait, QueueThrottle throttle, bool kmdWaitModeActive, diff --git a/shared/source/helpers/kmd_notify_properties.h b/shared/source/helpers/kmd_notify_properties.h index e64f3bc84f..ecebfb54fa 100644 --- a/shared/source/helpers/kmd_notify_properties.h +++ b/shared/source/helpers/kmd_notify_properties.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/command_stream/wait_status.h" #include @@ -42,8 +43,8 @@ class KmdNotifyHelper { MOCKABLE_VIRTUAL ~KmdNotifyHelper() = default; WaitParams obtainTimeoutParams(bool quickKmdSleepRequest, - uint32_t currentHwTag, - uint32_t taskCountToWait, + TagAddressType currentHwTag, + TaskCountType taskCountToWait, FlushStamp flushStampToWait, QueueThrottle throttle, bool kmdWaitModeActive, diff --git a/shared/source/helpers/pause_on_gpu_properties.h b/shared/source/helpers/pause_on_gpu_properties.h index e1f1c5cefe..77a0c14eaf 100644 --- a/shared/source/helpers/pause_on_gpu_properties.h +++ b/shared/source/helpers/pause_on_gpu_properties.h @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include @@ -28,7 +29,7 @@ inline bool featureEnabled(int32_t debugFlagValue) { return (debugFlagValue != DebugFlagValues::Disabled); } -inline bool pauseModeAllowed(int32_t debugFlagValue, uint32_t taskCount, PauseMode pauseMode) { +inline bool pauseModeAllowed(int32_t debugFlagValue, TaskCountType taskCount, PauseMode pauseMode) { if (!featureEnabled(debugFlagValue)) { // feature disabled return false; @@ -44,16 +45,16 @@ inline bool pauseModeAllowed(int32_t debugFlagValue, uint32_t taskCount, PauseMo return true; } - return (debugFlagValue == static_cast(taskCount)); + return (debugFlagValue == static_cast(taskCount)); } -inline bool gpuScratchRegWriteAllowed(int32_t debugFlagValue, uint32_t taskCount) { +inline bool gpuScratchRegWriteAllowed(int32_t debugFlagValue, TaskCountType taskCount) { if (!featureEnabled(debugFlagValue)) { // feature disabled return false; } - return (debugFlagValue == static_cast(taskCount)); + return (debugFlagValue == static_cast(taskCount)); } } // namespace PauseOnGpuProperties } // namespace NEO diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 60d1082647..c42aa3d5c3 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -154,7 +154,7 @@ struct TimestampPacketHelper { EncodeSempahore::addMiSemaphoreWaitCommand(cmdStream, static_cast(tagAddressPreviousRootDevice), - taskCountPreviousRootDevice, + static_cast(taskCountPreviousRootDevice), COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } } diff --git a/shared/source/memory_manager/allocations_list.cpp b/shared/source/memory_manager/allocations_list.cpp index 8de163eb49..3a01c61021 100644 --- a/shared/source/memory_manager/allocations_list.cpp +++ b/shared/source/memory_manager/allocations_list.cpp @@ -8,12 +8,13 @@ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/task_count_helper.h" namespace { struct ReusableAllocationRequirements { const void *requiredPtr; size_t requiredMinimalSize; - volatile uint32_t *csrTagAddress; + volatile TagAddressType *csrTagAddress; NEO::AllocationType allocationType; uint32_t contextId; uint32_t activeTileCount; diff --git a/shared/source/memory_manager/graphics_allocation.cpp b/shared/source/memory_manager/graphics_allocation.cpp index f261994b1b..f9c443e8ba 100644 --- a/shared/source/memory_manager/graphics_allocation.cpp +++ b/shared/source/memory_manager/graphics_allocation.cpp @@ -50,7 +50,7 @@ GraphicsAllocation::GraphicsAllocation(uint32_t rootDeviceIndex, size_t numGmms, GraphicsAllocation::~GraphicsAllocation() = default; -void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) { +void GraphicsAllocation::updateTaskCount(TaskCountType newTaskCount, uint32_t contextId) { if (usageInfos[contextId].taskCount == objectNotUsed) { registeredContextsNum++; } @@ -119,8 +119,7 @@ void GraphicsAllocation::prepareHostPtrForResidency(CommandStreamReceiver *csr) uint32_t GraphicsAllocation::getNumHandlesForKmdSharedAllocation(uint32_t numBanks) { return (numBanks > 1) && (DebugManager.flags.CreateKmdMigratedSharedAllocationWithMultipleBOs.get() != 0) ? numBanks : 1u; } - -constexpr uint32_t GraphicsAllocation::objectNotUsed; -constexpr uint32_t GraphicsAllocation::objectNotResident; -constexpr uint32_t GraphicsAllocation::objectAlwaysResident; +constexpr TaskCountType GraphicsAllocation::objectNotUsed; +constexpr TaskCountType GraphicsAllocation::objectNotResident; +constexpr TaskCountType GraphicsAllocation::objectAlwaysResident; } // namespace NEO diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 0eb242f989..874e62ad64 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/allocation_type.h" @@ -151,22 +152,22 @@ class GraphicsAllocation : public IDNode { bool isUsed() const { return registeredContextsNum > 0; } bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; } bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); } - MOCKABLE_VIRTUAL void updateTaskCount(uint32_t newTaskCount, uint32_t contextId); - MOCKABLE_VIRTUAL uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; } + MOCKABLE_VIRTUAL void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId); + MOCKABLE_VIRTUAL TaskCountType getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; } void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); } uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; } void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; } MOCKABLE_VIRTUAL bool isResident(uint32_t contextId) const { return GraphicsAllocation::objectNotResident != getResidencyTaskCount(contextId); } bool isAlwaysResident(uint32_t contextId) const { return GraphicsAllocation::objectAlwaysResident == getResidencyTaskCount(contextId); } - void updateResidencyTaskCount(uint32_t newTaskCount, uint32_t contextId) { + void updateResidencyTaskCount(TaskCountType newTaskCount, uint32_t contextId) { if (usageInfos[contextId].residencyTaskCount != GraphicsAllocation::objectAlwaysResident || newTaskCount == GraphicsAllocation::objectNotResident) { usageInfos[contextId].residencyTaskCount = newTaskCount; } } - uint32_t getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; } + TaskCountType getResidencyTaskCount(uint32_t contextId) const { return usageInfos[contextId].residencyTaskCount; } void releaseResidencyInOsContext(uint32_t contextId) { updateResidencyTaskCount(objectNotResident, contextId); } - bool isResidencyTaskCountBelow(uint32_t taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; } + bool isResidencyTaskCountBelow(TaskCountType taskCount, uint32_t contextId) const { return !isResident(contextId) || getResidencyTaskCount(contextId) < taskCount; } virtual std::string getAllocationInfoString() const; virtual uint64_t peekInternalHandle(MemoryManager *memoryManager) { return 0llu; } @@ -269,16 +270,16 @@ class GraphicsAllocation : public IDNode { static constexpr uint32_t defaultBank = 0b1u; static constexpr uint32_t allBanks = 0xffffffff; - constexpr static uint32_t objectNotResident = std::numeric_limits::max(); - constexpr static uint32_t objectNotUsed = std::numeric_limits::max(); - constexpr static uint32_t objectAlwaysResident = std::numeric_limits::max() - 1; + constexpr static TaskCountType objectNotResident = std::numeric_limits::max(); + constexpr static TaskCountType objectNotUsed = std::numeric_limits::max(); + constexpr static TaskCountType objectAlwaysResident = std::numeric_limits::max() - 1; std::atomic hostPtrTaskCountAssignment{0}; bool isShareableHostMemory = false; protected: struct UsageInfo { - uint32_t taskCount = objectNotUsed; - uint32_t residencyTaskCount = objectNotResident; + TaskCountType taskCount = objectNotUsed; + TaskCountType residencyTaskCount = objectNotResident; uint32_t inspectionId = 0u; }; diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp index 9ea10ae9c7..af811ecd0d 100644 --- a/shared/source/memory_manager/internal_allocation_storage.cpp +++ b/shared/source/memory_manager/internal_allocation_storage.cpp @@ -17,7 +17,7 @@ InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &comm : commandStreamReceiver(commandStreamReceiver){}; void InternalAllocationStorage::storeAllocation(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage) { - uint32_t taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId()); + TaskCountType taskCount = gfxAllocation->getTaskCount(commandStreamReceiver.getOsContext().getContextId()); if (allocationUsage == REUSABLE_ALLOCATION) { taskCount = commandStreamReceiver.peekTaskCount(); @@ -25,7 +25,7 @@ void InternalAllocationStorage::storeAllocation(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, uint32_t taskCount) { +void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount) { if (allocationUsage == REUSABLE_ALLOCATION) { if (DebugManager.flags.DisableResourceRecycling.get()) { commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release()); @@ -37,7 +37,7 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptrgetHostPtrManager()->obtainOwnership(); diff --git a/shared/source/memory_manager/internal_allocation_storage.h b/shared/source/memory_manager/internal_allocation_storage.h index cad4f5d8f9..f2464ea697 100644 --- a/shared/source/memory_manager/internal_allocation_storage.h +++ b/shared/source/memory_manager/internal_allocation_storage.h @@ -17,9 +17,9 @@ class InternalAllocationStorage { public: MOCKABLE_VIRTUAL ~InternalAllocationStorage() = default; InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver); - MOCKABLE_VIRTUAL void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage); + MOCKABLE_VIRTUAL void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage); void storeAllocation(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage); - void storeAllocationWithTaskCount(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, uint32_t taskCount); + void storeAllocationWithTaskCount(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount); std::unique_ptr obtainReusableAllocation(size_t requiredSize, AllocationType allocationType); std::unique_ptr obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType); AllocationsList &getTemporaryAllocations() { return allocationLists[TEMPORARY_ALLOCATION]; } @@ -28,7 +28,7 @@ class InternalAllocationStorage { DeviceBitfield getDeviceBitfield() const; protected: - void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList); + void freeAllocationsList(TaskCountType waitTaskCount, AllocationsList &allocationsList); CommandStreamReceiver &commandStreamReceiver; std::array allocationLists = {AllocationsList(TEMPORARY_ALLOCATION), AllocationsList(REUSABLE_ALLOCATION), AllocationsList(DEFERRED_DEALLOCATION)}; diff --git a/shared/source/memory_manager/migration_sync_data.cpp b/shared/source/memory_manager/migration_sync_data.cpp index 5c33559dd6..3432431286 100644 --- a/shared/source/memory_manager/migration_sync_data.cpp +++ b/shared/source/memory_manager/migration_sync_data.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/source/memory_manager/migration_sync_data.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" @@ -20,14 +21,14 @@ MigrationSyncData::~MigrationSyncData() { } uint32_t MigrationSyncData::getCurrentLocation() const { return currentLocation; } -bool MigrationSyncData::isUsedByTheSameContext(volatile uint32_t *tagAddress) const { return this->tagAddress == tagAddress; } +bool MigrationSyncData::isUsedByTheSameContext(volatile TagAddressType *tagAddress) const { return this->tagAddress == tagAddress; } void MigrationSyncData::setCurrentLocation(uint32_t rootDeviceIndex) { currentLocation = rootDeviceIndex; migrationInProgress = false; } -void MigrationSyncData::signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount) { +void MigrationSyncData::signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount) { this->tagAddress = tagAddress; latestTaskCountUsed = taskCount; } diff --git a/shared/source/memory_manager/migration_sync_data.h b/shared/source/memory_manager/migration_sync_data.h index 34a72d93c8..a0c10edb24 100644 --- a/shared/source/memory_manager/migration_sync_data.h +++ b/shared/source/memory_manager/migration_sync_data.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/utilities/reference_tracked_object.h" #include @@ -21,17 +22,17 @@ class MigrationSyncData : public ReferenceTrackedObject { uint32_t getCurrentLocation() const; void startMigration(); void setCurrentLocation(uint32_t rootDeviceIndex); - MOCKABLE_VIRTUAL void signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount); - bool isUsedByTheSameContext(volatile uint32_t *tagAddress) const; + MOCKABLE_VIRTUAL void signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount); + bool isUsedByTheSameContext(volatile TagAddressType *tagAddress) const; MOCKABLE_VIRTUAL void waitOnCpu(); bool isMigrationInProgress() const { return migrationInProgress; } void *getHostPtr() const { return hostPtr; } protected: MOCKABLE_VIRTUAL void yield() const; - volatile uint32_t *tagAddress = nullptr; + volatile TagAddressType *tagAddress = nullptr; void *hostPtr = nullptr; - uint32_t latestTaskCountUsed = 0u; + TaskCountType latestTaskCountUsed = 0u; uint32_t currentLocation = locationUndefined; bool migrationInProgress = false; }; diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 57a476cbc5..2b550777fd 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -578,7 +578,7 @@ bool SVMAllocsManager::hasHostAllocations() { return false; } -void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount) { +void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount) { std::unique_lock lock(mtx); bool parseAllAllocations = false; auto entry = indirectAllocationsResidency.find(&commandStreamReceiver); diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 7d27778eb3..867f14173d 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/common_types.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "shared/source/memory_manager/residency_container.h" @@ -108,8 +109,8 @@ class SVMAllocsManager { }; struct InternalAllocationsTracker { - uint32_t latestSentTaskCount = 0lu; - uint32_t latestResidentObjectId = 0lu; + TaskCountType latestSentTaskCount = 0lu; + TaskCountType latestResidentObjectId = 0lu; }; struct UnifiedMemoryProperties { @@ -183,7 +184,7 @@ class SVMAllocsManager { void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData); bool hasHostAllocations(); std::atomic allocationsCounter = 0; - MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount); + MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount); void prepareIndirectAllocationForDestruction(SvmAllocationData *); void prefetchMemory(Device &device, SvmAllocationData &svmData); std::unique_lock obtainOwnership(); diff --git a/shared/source/os_interface/linux/drm_buffer_object.cpp b/shared/source/os_interface/linux/drm_buffer_object.cpp index e6328bffca..043dc5cfe8 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.cpp +++ b/shared/source/os_interface/linux/drm_buffer_object.cpp @@ -7,6 +7,7 @@ #include "shared/source/os_interface/linux/drm_buffer_object.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" @@ -118,7 +119,7 @@ void BufferObject::fillExecObject(ExecObject &execObject, OsContext *osContext, } int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId, - BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) { + BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) { for (size_t i = 0; i < residencyCount; i++) { residency[i]->fillExecObject(execObjectsStorage[i], osContext, vmHandleId, drmContextId); } diff --git a/shared/source/os_interface/linux/drm_buffer_object.h b/shared/source/os_interface/linux/drm_buffer_object.h index 1ebc0f02f0..04c8edae41 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.h +++ b/shared/source/os_interface/linux/drm_buffer_object.h @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/definitions/engine_limits.h" @@ -46,7 +47,7 @@ class BufferObject { MOCKABLE_VIRTUAL int validateHostPtr(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId); MOCKABLE_VIRTUAL int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId, - BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue); + BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue); int bind(OsContext *osContext, uint32_t vmHandleId); int unbind(OsContext *osContext, uint32_t vmHandleId); diff --git a/shared/source/os_interface/linux/drm_command_stream.h b/shared/source/os_interface/linux/drm_command_stream.h index 235f1e29f7..60701b9a03 100644 --- a/shared/source/os_interface/linux/drm_command_stream.h +++ b/shared/source/os_interface/linux/drm_command_stream.h @@ -68,7 +68,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver { protected: MOCKABLE_VIRTUAL SubmissionStatus flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency); MOCKABLE_VIRTUAL int exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId, uint32_t index); - MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue); + MOCKABLE_VIRTUAL int waitUserFence(TaskCountType waitValue); MOCKABLE_VIRTUAL void readBackAllocation(void *source); bool isUserFenceWaitActive(); diff --git a/shared/source/os_interface/linux/drm_command_stream.inl b/shared/source/os_interface/linux/drm_command_stream.inl index ad9aea6d3c..cc8217ae84 100644 --- a/shared/source/os_interface/linux/drm_command_stream.inl +++ b/shared/source/os_interface/linux/drm_command_stream.inl @@ -224,7 +224,7 @@ int DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, ui } uint64_t completionGpuAddress = 0; - uint32_t completionValue = 0; + TaskCountType completionValue = 0; if (this->drm->isVmBindAvailable() && this->drm->completionFenceSupport()) { completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->postSyncWriteOffset) + Drm::completionFenceOffset; completionValue = this->latestSentTaskCount; diff --git a/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl b/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl index 8f1982fd3f..1bf5cb0887 100644 --- a/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl +++ b/shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl @@ -23,13 +23,13 @@ SubmissionStatus DrmCommandStreamReceiver::flushInternal(const BatchB } template -int DrmCommandStreamReceiver::waitUserFence(uint32_t waitValue) { +int DrmCommandStreamReceiver::waitUserFence(TaskCountType waitValue) { uint32_t ctxId = 0u; - uint64_t tagAddress = castToUint64(const_cast(getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(getTagAddress())); if (useContextForUserFenceWait) { ctxId = static_cast(osContext)->getDrmContextIds()[0]; } - return this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u); + return this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u); } } // namespace NEO diff --git a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl index 812ecb736e..9d1441896a 100644 --- a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl +++ b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl @@ -54,10 +54,10 @@ SubmissionStatus DrmCommandStreamReceiver::flushInternal(const BatchB } template -int DrmCommandStreamReceiver::waitUserFence(uint32_t waitValue) { +int DrmCommandStreamReceiver::waitUserFence(TaskCountType waitValue) { int ret = 0; StackVec ctxIds; - uint64_t tagAddress = castToUint64(const_cast(getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(getTagAddress())); if (useContextForUserFenceWait) { for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) { uint32_t ctxId = 0u; @@ -68,12 +68,12 @@ int DrmCommandStreamReceiver::waitUserFence(uint32_t waitValue) { } UNRECOVERABLE_IF(ctxIds.size() != this->activePartitions); for (uint32_t i = 0; i < this->activePartitions; i++) { - ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u); + ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u); tagAddress += this->postSyncWriteOffset; } } else { for (uint32_t i = 0; i < this->activePartitions; i++) { - ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u); + ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u); tagAddress += this->postSyncWriteOffset; } } diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 7e481fec6a..d93e19f934 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -1613,7 +1613,7 @@ void Drm::waitOnUserFences(const OsContextLinux &osContext, uint64_t address, ui if (*reinterpret_cast(completionFenceCpuAddress) < value) { constexpr int64_t timeout = -1; constexpr uint16_t flags = 0; - int retVal = waitUserFence(drmContextIds[drmIterator], completionFenceCpuAddress, value, Drm::ValueWidth::U32, timeout, flags); + int retVal = waitUserFence(drmContextIds[drmIterator], completionFenceCpuAddress, value, Drm::ValueWidth::U64, timeout, flags); if (DebugManager.flags.PrintCompletionFenceUsage.get()) { std::cout << "Completion fence waited." diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index d557470450..892b9e66f1 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/os_interface/linux/drm_wrappers.h" #include "shared/source/utilities/stackvec.h" @@ -87,7 +88,7 @@ class IoctlHelper { virtual uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) = 0; virtual uint32_t queryDistances(std::vector &queryItems, std::vector &distanceInfos) = 0; virtual uint16_t getWaitUserFenceSoftFlag() = 0; - virtual int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) = 0; + virtual int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) = 0; virtual bool completionFenceExtensionSupported(const bool isVmBindAvailable) = 0; virtual std::optional getHasPageFaultParamId() = 0; virtual std::unique_ptr createVmControlExtRegion(const std::optional ®ionInstanceClass) = 0; @@ -165,7 +166,7 @@ class IoctlHelperUpstream : public IoctlHelper { uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) override; uint32_t queryDistances(std::vector &queryItems, std::vector &distanceInfos) override; uint16_t getWaitUserFenceSoftFlag() override; - int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) override; + int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) override; bool completionFenceExtensionSupported(const bool isVmBindAvailable) override; std::optional getHasPageFaultParamId() override; std::unique_ptr createVmControlExtRegion(const std::optional ®ionInstanceClass) override; @@ -231,7 +232,7 @@ class IoctlHelperPrelim20 : public IoctlHelper { uint64_t getFlagsForVmBind(bool bindCapture, bool bindImmediate, bool bindMakeResident) override; uint32_t queryDistances(std::vector &queryItems, std::vector &distanceInfos) override; uint16_t getWaitUserFenceSoftFlag() override; - int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) override; + int execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) override; bool completionFenceExtensionSupported(const bool isVmBindAvailable) override; std::optional getHasPageFaultParamId() override; std::unique_ptr createVmControlExtRegion(const std::optional ®ionInstanceClass) override; diff --git a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp index 63022677ed..259aa501ae 100644 --- a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp @@ -256,7 +256,7 @@ uint16_t IoctlHelperPrelim20::getWaitUserFenceSoftFlag() { return PRELIM_I915_UFENCE_WAIT_SOFT; }; -int IoctlHelperPrelim20::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) { +int IoctlHelperPrelim20::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) { prelim_drm_i915_gem_execbuffer_ext_user_fence fenceObject = {}; if (completionGpuAddress != 0) { fenceObject.base.name = PRELIM_DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE; diff --git a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp index 9efcf54629..a0b107e90c 100644 --- a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp @@ -117,7 +117,7 @@ uint16_t IoctlHelperUpstream::getWaitUserFenceSoftFlag() { return 0; } -int IoctlHelperUpstream::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, uint32_t counterValue) { +int IoctlHelperUpstream::execBuffer(ExecBuffer *execBuffer, uint64_t completionGpuAddress, TaskCountType counterValue) { return ioctl(DrmIoctl::GemExecbuffer2, execBuffer); } diff --git a/shared/source/utilities/wait_util.h b/shared/source/utilities/wait_util.h index b3f5903518..6d7491885f 100644 --- a/shared/source/utilities/wait_util.h +++ b/shared/source/utilities/wait_util.h @@ -1,11 +1,12 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/utilities/cpuintrinsics.h" #include @@ -33,8 +34,8 @@ inline bool waitFunctionWithPredicate(volatile T const *pollAddress, T expectedV return false; } -inline bool waitFunction(volatile uint32_t *pollAddress, uint32_t expectedValue) { - return waitFunctionWithPredicate(pollAddress, expectedValue, std::greater_equal()); +inline bool waitFunction(volatile TagAddressType *pollAddress, TaskCountType expectedValue) { + return waitFunctionWithPredicate(pollAddress, expectedValue, std::greater_equal()); } void init(); diff --git a/shared/test/common/fixtures/device_fixture.cpp b/shared/test/common/fixtures/device_fixture.cpp index bbe22f05cb..aebaa90c2f 100644 --- a/shared/test/common/fixtures/device_fixture.cpp +++ b/shared/test/common/fixtures/device_fixture.cpp @@ -24,7 +24,7 @@ void DeviceFixture::setUpImpl(const NEO::HardwareInfo *hardwareInfo) { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); - ASSERT_NE(nullptr, const_cast(pTagMemory)); + ASSERT_NE(nullptr, const_cast(pTagMemory)); } void DeviceFixture::tearDown() { diff --git a/shared/test/common/fixtures/device_fixture.h b/shared/test/common/fixtures/device_fixture.h index 359e4dfe9e..a03238c300 100644 --- a/shared/test/common/fixtures/device_fixture.h +++ b/shared/test/common/fixtures/device_fixture.h @@ -19,7 +19,7 @@ struct DeviceFixture { MockDevice *createWithUsDeviceIdRevId(unsigned short usDeviceId, unsigned short usRevId); MockDevice *pDevice = nullptr; - volatile uint32_t *pTagMemory = nullptr; + volatile TagAddressType *pTagMemory = nullptr; HardwareInfo hardwareInfo = {}; PLATFORM platformHelper = {}; const uint32_t rootDeviceIndex = 0u; diff --git a/shared/test/common/fixtures/memory_manager_fixture.h b/shared/test/common/fixtures/memory_manager_fixture.h index 981cb89475..531070a63c 100644 --- a/shared/test/common/fixtures/memory_manager_fixture.h +++ b/shared/test/common/fixtures/memory_manager_fixture.h @@ -6,6 +6,7 @@ */ #pragma once +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/options.h" @@ -21,8 +22,8 @@ class MemoryManagerWithCsrFixture { MockMemoryManager *memoryManager; ExecutionEnvironment executionEnvironment; std::unique_ptr csr; - uint32_t taskCount = 0; - uint32_t currentGpuTag = initialHardwareTag; + TaskCountType taskCount = 0; + TagAddressType currentGpuTag = initialHardwareTag; ~MemoryManagerWithCsrFixture() = default; diff --git a/shared/test/common/libult/ult_aub_command_stream_receiver.h b/shared/test/common/libult/ult_aub_command_stream_receiver.h index 9a2c6376d0..20f1193bdd 100644 --- a/shared/test/common/libult/ult_aub_command_stream_receiver.h +++ b/shared/test/common/libult/ult_aub_command_stream_receiver.h @@ -50,7 +50,7 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw return csr; } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { blitBufferCalled++; return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); } diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index e106a4307d..66a854ccde 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -176,7 +176,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { recordedDispatchFlags = dispatchFlags; this->lastFlushedCommandStream = &commandStream; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); @@ -196,7 +196,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ downloadAllocationCalled = true; } - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait); waitForCompletionWithTimeoutTaskCountCalled++; if (callBaseWaitForCompletionWithTimeout) { @@ -212,11 +212,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ } } - WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) { + WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) { return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait); } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { if (waitForTaskCountWithKmdNotifyFallbackReturnValue.has_value()) { return *waitForTaskCountWithKmdNotifyFallbackReturnValue; } @@ -245,7 +245,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return makeResidentAllocations.find(graphicsAllocation) != makeResidentAllocations.end(); } - bool isMadeResident(GraphicsAllocation *graphicsAllocation, uint32_t taskCount) const { + bool isMadeResident(GraphicsAllocation *graphicsAllocation, TaskCountType taskCount) const { auto it = makeResidentAllocations.find(graphicsAllocation); if (it == makeResidentAllocations.end()) { return false; @@ -289,7 +289,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return CommandStreamReceiverHw::obtainUniqueOwnership(); } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { blitBufferCalled++; receivedBlitProperties = blitPropertiesContainer; @@ -363,13 +363,13 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ BatchBuffer latestFlushedBatchBuffer = {}; std::atomic recursiveLockCounter; - std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; + std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; std::atomic waitForCompletionWithTimeoutTaskCountCalled{0}; LinearStream *lastFlushedCommandStream = nullptr; uint32_t makeSurfacePackNonResidentCalled = false; - uint32_t latestSentTaskCountValueDuringFlush = 0; + TaskCountType latestSentTaskCountValueDuringFlush = 0; uint32_t blitBufferCalled = 0; uint32_t createPerDssBackedBufferCalled = 0; uint32_t initDirectSubmissionCalled = 0; @@ -400,7 +400,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready; std::optional waitForTaskCountWithKmdNotifyFallbackReturnValue{}; bool callBaseFlushBcsTask{true}; - uint32_t flushBcsTaskReturnValue{}; + TaskCountType flushBcsTaskReturnValue{}; std::optional flushReturnValue{}; CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; }; diff --git a/shared/test/common/mocks/linux/mock_drm_allocation.h b/shared/test/common/mocks/linux/mock_drm_allocation.h index 7a4c3e21fb..456a8c6f16 100644 --- a/shared/test/common/mocks/linux/mock_drm_allocation.h +++ b/shared/test/common/mocks/linux/mock_drm_allocation.h @@ -21,14 +21,14 @@ class MockBufferObject : public BufferObject { struct ExecParams { uint64_t completionGpuAddress = 0; - uint32_t completionValue = 0; + TaskCountType completionValue = 0; }; std::vector passedExecParams{}; MockBufferObject(Drm *drm) : BufferObject(drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) { } int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId, - BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override { + BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) override { passedExecParams.push_back({completionGpuAddress, completionValue}); return BufferObject::exec(used, startOffset, flags, requiresCoherency, osContext, vmHandleId, drmContextId, residency, residencyCount, execObjectsStorage, completionGpuAddress, completionValue); diff --git a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h index cea5f667e5..d61dada8ac 100644 --- a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h +++ b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h @@ -99,14 +99,14 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver { CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { recordedDispatchFlags = dispatchFlags; return AUBCommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); @@ -123,7 +123,7 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw { expectMemoryCompressedCalled = true; return AUBCommandStreamReceiverHw::expectMemoryCompressed(gfxAddress, srcAddress, length); } - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { return NEO::WaitStatus::Ready; } void addAubComment(const char *message) override { diff --git a/shared/test/common/mocks/mock_command_stream_receiver.cpp b/shared/test/common/mocks/mock_command_stream_receiver.cpp index f59dc2a1e6..836bdd30a9 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.cpp +++ b/shared/test/common/mocks/mock_command_stream_receiver.cpp @@ -7,7 +7,7 @@ #include "shared/test/common/mocks/mock_command_stream_receiver.h" -volatile uint32_t MockCommandStreamReceiver::mockTagAddress[MockCommandStreamReceiver::tagSize]; +volatile TagAddressType MockCommandStreamReceiver::mockTagAddress[MockCommandStreamReceiver::tagSize]; SubmissionStatus MockCommandStreamReceiver::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { return SubmissionStatus::SUCCESS; @@ -19,7 +19,7 @@ CompletionStamp MockCommandStreamReceiver::flushTask( const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { ++taskCount; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 24006dfecc..e99c469e55 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -50,10 +50,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { MockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) { CommandStreamReceiver::tagAddress = &mockTagAddress[0]; - memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t)); + memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(TagAddressType)); } - WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { + WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { waitForCompletionWithTimeoutCalled++; return waitForCompletionWithTimeoutReturnValue; } @@ -80,7 +80,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { } } - bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) override { + bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) override { if (testTaskCountReadyReturnValue.has_value()) { return *testTaskCountReadyReturnValue; } else { @@ -100,7 +100,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; @@ -111,15 +111,15 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { return true; } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { return WaitStatus::Ready; } - WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) { + WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) { return WaitStatus::Ready; } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; CommandStreamReceiverType getType() override { return commandStreamReceiverType; @@ -176,7 +176,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::SUCCESS; } static constexpr size_t tagSize = 256; - static volatile uint32_t mockTagAddress[tagSize]; + static volatile TagAddressType mockTagAddress[tagSize]; std::vector instructionHeapReserveredData; int *flushBatchedSubmissionsCallCounter = nullptr; uint32_t waitForCompletionWithTimeoutCalled = 0; @@ -286,7 +286,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, - const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); @@ -306,7 +306,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { return completionStamp; } - uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { if (!skipBlitCalls) { return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); } diff --git a/shared/test/common/mocks/mock_csr.h b/shared/test/common/mocks/mock_csr.h index b6e7305023..1c17805c55 100644 --- a/shared/test/common/mocks/mock_csr.h +++ b/shared/test/common/mocks/mock_csr.h @@ -117,7 +117,7 @@ class MockCsr : public MockCsrBase { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - uint32_t taskLevel, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { this->flushTaskStamp = *this->executionStamp; @@ -140,7 +140,7 @@ class MockCsr : public MockCsrBase { bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; } bool slmUsedInLastFlushTask = false; - uint32_t lastTaskLevelToFlushTask = 0; + TaskCountType lastTaskLevelToFlushTask = 0; }; template diff --git a/shared/test/common/mocks/mock_graphics_allocation.h b/shared/test/common/mocks/mock_graphics_allocation.h index fad91e2d31..498f9c7c9b 100644 --- a/shared/test/common/mocks/mock_graphics_allocation.h +++ b/shared/test/common/mocks/mock_graphics_allocation.h @@ -54,11 +54,11 @@ class MockGraphicsAllocation : public MemoryAllocation { class MockGraphicsAllocationTaskCount : public MockGraphicsAllocation { public: - uint32_t getTaskCount(uint32_t contextId) const override { + TaskCountType getTaskCount(uint32_t contextId) const override { getTaskCountCalleedTimes++; return MockGraphicsAllocation::getTaskCount(contextId); } - void updateTaskCount(uint32_t newTaskCount, uint32_t contextId) override { + void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId) override { updateTaskCountCalleedTimes++; MockGraphicsAllocation::updateTaskCount(newTaskCount, contextId); } diff --git a/shared/test/common/mocks/mock_internal_allocation_storage.h b/shared/test/common/mocks/mock_internal_allocation_storage.h index 0335ddf234..d20619bfe7 100644 --- a/shared/test/common/mocks/mock_internal_allocation_storage.h +++ b/shared/test/common/mocks/mock_internal_allocation_storage.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,7 +13,7 @@ namespace NEO { class MockInternalAllocationStorage : public InternalAllocationStorage { public: using InternalAllocationStorage::InternalAllocationStorage; - void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) override { + void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage) override { cleanAllocationsCalled++; lastCleanAllocationsTaskCount = waitTaskCount; lastCleanAllocationUsage = allocationUsage; @@ -23,14 +23,14 @@ class MockInternalAllocationStorage : public InternalAllocationStorage { doUpdateCompletion = false; } } - void updateCompletionAfterCleaningList(uint32_t newValue) { + void updateCompletionAfterCleaningList(TaskCountType newValue) { doUpdateCompletion = true; valueToUpdateCompletion = newValue; } bool doUpdateCompletion = false; - uint32_t valueToUpdateCompletion; + TaskCountType valueToUpdateCompletion; uint32_t lastCleanAllocationUsage = 0; - uint32_t lastCleanAllocationsTaskCount = 0; + TaskCountType lastCleanAllocationsTaskCount = 0; uint32_t cleanAllocationsCalled = 0; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_migration_sync_data.h b/shared/test/common/mocks/mock_migration_sync_data.h index 4d4a425c93..6dc2a3e1f3 100644 --- a/shared/test/common/mocks/mock_migration_sync_data.h +++ b/shared/test/common/mocks/mock_migration_sync_data.h @@ -21,7 +21,7 @@ struct MockMigrationSyncData : public MigrationSyncData { using MigrationSyncData::latestTaskCountUsed; using MigrationSyncData::MigrationSyncData; using MigrationSyncData::tagAddress; - void signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount) override { + void signalUsage(volatile TagAddressType *tagAddress, TaskCountType taskCount) override { signalUsageCalled++; MigrationSyncData::signalUsage(tagAddress, taskCount); } diff --git a/shared/test/common/mocks/mock_tbx_csr.h b/shared/test/common/mocks/mock_tbx_csr.h index c0c2d215e4..f3f8bc5e3f 100644 --- a/shared/test/common/mocks/mock_tbx_csr.h +++ b/shared/test/common/mocks/mock_tbx_csr.h @@ -91,7 +91,7 @@ struct MockTbxCsrRegisterDownloadedAllocations : TbxCommandStreamReceiverHwdownloadAllocationImpl = nullptr; } void downloadAllocationTbxMock(GraphicsAllocation &gfxAllocation) { - *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; + *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; downloadedAllocations.insert(&gfxAllocation); } bool flushBatchedSubmissions() override { diff --git a/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h b/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h index 3e091bbc0e..bd26929427 100644 --- a/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h +++ b/shared/test/common/os_interface/linux/drm_buffer_object_fixture.h @@ -40,7 +40,7 @@ class TestedBufferObject : public BufferObject { } int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId, - BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, uint32_t completionValue) override { + BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) override { this->receivedCompletionGpuAddress = completionGpuAddress; this->receivedCompletionValue = completionValue; this->execCalled++; @@ -61,7 +61,7 @@ class TestedBufferObject : public BufferObject { uint64_t receivedCompletionGpuAddress = 0; ExecObject *execObjectPointerFilled = nullptr; - uint32_t receivedCompletionValue = 0; + TaskCountType receivedCompletionValue = 0; uint32_t execCalled = 0; bool callBaseEvictUnusedAllocations{true}; }; diff --git a/shared/test/common/utilities/cpuintrinsics.cpp b/shared/test/common/utilities/cpuintrinsics.cpp index 1b34ea7965..7dc9348aa2 100644 --- a/shared/test/common/utilities/cpuintrinsics.cpp +++ b/shared/test/common/utilities/cpuintrinsics.cpp @@ -7,6 +7,7 @@ #include "shared/source/utilities/cpuintrinsics.h" +#include "shared/source/command_stream/task_count_helper.h" #include "shared/source/helpers/ptr_math.h" #include @@ -14,14 +15,14 @@ #include namespace CpuIntrinsicsTests { -//std::atomic is used for sake of sanitation in MT tests +// std::atomic is used for sake of sanitation in MT tests std::atomic lastClFlushedPtr(0u); std::atomic clFlushCounter(0u); std::atomic pauseCounter(0u); std::atomic sfenceCounter(0u); -volatile uint32_t *pauseAddress = nullptr; -uint32_t pauseValue = 0u; +volatile TagAddressType *pauseAddress = nullptr; +TaskCountType pauseValue = 0u; uint32_t pauseOffset = 0u; std::function setupPauseAddress; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 3a5f9bea3e..81aeb77359 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -78,7 +78,7 @@ TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionValueTh } TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionAddressThenProperAddressIsReturned) { - auto expectedAddress = castToUint64(const_cast(commandStreamReceiver->getTagAddress())); + auto expectedAddress = castToUint64(const_cast(commandStreamReceiver->getTagAddress())); EXPECT_EQ(expectedAddress, commandStreamReceiver->getCompletionAddress()); } @@ -186,7 +186,7 @@ TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotCh } TEST_F(CommandStreamReceiverTest, WhenCommandStreamReceiverIsCreatedThenItHasATagValue) { - EXPECT_NE(nullptr, const_cast(commandStreamReceiver->getTagAddress())); + EXPECT_NE(nullptr, const_cast(commandStreamReceiver->getTagAddress())); } TEST_F(CommandStreamReceiverTest, WhenGettingCommandStreamerThenValidPointerIsReturned) { @@ -275,7 +275,7 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTi csr.activePartitions = 1; csr.gpuHangCheckPeriod = 0us; - volatile std::uint32_t tasksCount[16] = {}; + volatile TagAddressType tasksCount[16] = {}; csr.tagAddress = tasksCount; constexpr auto enableTimeout = false; @@ -290,7 +290,7 @@ HWTEST_F(CommandStreamReceiverTest, givenNoGpuHangWhenWaititingForCompletionWith auto driverModelMock = std::make_unique(); driverModelMock->isGpuHangDetectedToReturn = false; - volatile std::uint32_t tasksCount[16] = {}; + volatile TagAddressType tasksCount[16] = {}; driverModelMock->isGpuHangDetectedSideEffect = [&tasksCount] { tasksCount[0]++; }; @@ -367,7 +367,7 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpu csr.activePartitions = 1; csr.gpuHangCheckPeriod = 0us; - volatile std::uint32_t tasksCount[16] = {}; + volatile TagAddressType tasksCount[16] = {}; csr.tagAddress = tasksCount; constexpr auto taskCountToWait = 1; @@ -413,8 +413,8 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangAndNonEmptyAllocationsListWhenCa csr.activePartitions = 1; csr.gpuHangCheckPeriod = 0us; - volatile std::uint32_t tasksCount[16] = {}; - VariableBackup csrTagAddressBackup(&csr.tagAddress); + volatile TagAddressType tasksCount[16] = {}; + VariableBackup csrTagAddressBackup(&csr.tagAddress); csr.tagAddress = tasksCount; auto hostPtr = reinterpret_cast(0x1234); @@ -1473,8 +1473,8 @@ TEST(CommandStreamReceiverSimpleTest, givenGpuNotIdleImplicitFlushCheckEnabledWh namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; -extern volatile uint32_t *pauseAddress; -extern uint32_t pauseValue; +extern volatile TagAddressType *pauseAddress; +extern TaskCountType pauseValue; extern uint32_t pauseOffset; } // namespace CpuIntrinsicsTests @@ -1505,8 +1505,8 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo csr.taskCount = 3u; csr.activePartitions = 2; - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); CpuIntrinsicsTests::pauseAddress = &csr.mockTagAddress[0]; @@ -1532,8 +1532,8 @@ TEST(CommandStreamReceiverSimpleTest, givenEmptyTemporaryAllocationListWhenWaiti csr.mockTagAddress[0] = 0u; csr.taskCount = 3u; - VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); - VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); CpuIntrinsicsTests::pauseAddress = &csr.mockTagAddress[0]; @@ -2145,15 +2145,15 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn auto &csr = pDevice->getUltCommandStreamReceiver(); csr.activePartitions = 2; - volatile uint32_t *tagAddress = csr.tagAddress; - constexpr uint32_t tagValue = 2; + volatile TagAddressType *tagAddress = csr.tagAddress; + constexpr TagAddressType tagValue = 2; *tagAddress = tagValue; tagAddress = ptrOffset(tagAddress, csr.postSyncWriteOffset); *tagAddress = tagValue; WaitParams waitParams; waitParams.waitTimeout = std::numeric_limits::max(); - constexpr uint32_t taskCount = 1; + constexpr TaskCountType taskCount = 1; testing::internal::CaptureStdout(); @@ -2166,7 +2166,7 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn expectedOutput << std::endl << "Waiting for task count " << taskCount - << " at location " << const_cast(csr.tagAddress) + << " at location " << const_cast(csr.tagAddress) << " with timeout " << std::hex << waitParams.waitTimeout << ". Current value: " << std::dec << tagValue << " " << tagValue @@ -2298,7 +2298,7 @@ struct MockRequiredScratchSpaceController : public ScratchSpaceControllerBase { uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { diff --git a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp index acdb492df6..45d09e268a 100644 --- a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp +++ b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp @@ -56,7 +56,7 @@ struct DeferrableAllocationDeletionTest : ::testing::Test { MockMemoryManager *memoryManager = nullptr; std::unique_ptr device; uint32_t defaultOsContextId = 0; - volatile uint32_t *hwTag = nullptr; + volatile TagAddressType *hwTag = nullptr; }; TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenWaitForEachTaskCount) { diff --git a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp index a35044f201..179124512b 100644 --- a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp @@ -876,7 +876,7 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); - uint32_t taskCountReady = 2; + TaskCountType taskCountReady = 2; auto storage = new MockInternalAllocationStorage(*csr); csr->internalAllocationStorage.reset(storage); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); @@ -901,16 +901,16 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF } HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWaitAndCleanOnAllEngines) { - uint32_t taskCountReady = 2; - uint32_t taskCountNotReady = 1; + TaskCountType taskCountReady = 2; + TaskCountType taskCountNotReady = 1; auto &engines = memoryManager->getRegisteredEngines(); EXPECT_EQ(1u, engines.size()); auto csr0 = static_cast(engines[0].commandStreamReceiver); auto csr1 = std::make_unique(executionEnvironment, 0, 1); - uint32_t csr0GpuTag = taskCountNotReady; - uint32_t csr1GpuTag = taskCountNotReady; + TaskCountType csr0GpuTag = taskCountNotReady; + TaskCountType csr1GpuTag = taskCountNotReady; csr0->tagAddress = &csr0GpuTag; csr1->tagAddress = &csr1GpuTag; auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::LowPriority})); @@ -969,7 +969,7 @@ TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredF auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); - uint32_t taskCountReady = 2; + TaskCountType taskCountReady = 2; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); @@ -1052,7 +1052,7 @@ TEST_F(HostPtrAllocationTest, GivenAllocationsWithBiggerOverlapWhenChckingForOve auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); - uint32_t taskCountReady = 1; + TaskCountType taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); diff --git a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp index 66f06455bf..7e423683be 100644 --- a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp +++ b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp @@ -288,7 +288,7 @@ TEST_F(MigrationSyncDataTests, whenSetTargetLocationIsCalledThenProperLocationIs TEST(MigrationSyncDataTest, whenWaitOnCpuIsCalledThenWaitForValueSpecifiedInSignalUsageMethod) { auto migrationSyncData = std::make_unique(MemoryConstants::pageSize); - uint32_t tagAddress = 0; + TagAddressType tagAddress = 0; migrationSyncData->signalUsage(&tagAddress, 2u); migrationSyncData->waitOnCpu(); @@ -297,7 +297,7 @@ TEST(MigrationSyncDataTest, whenWaitOnCpuIsCalledThenWaitForValueSpecifiedInSign TEST(MigrationSyncDataTest, whenTaskCountIsHigherThanExpectedThenWaitOnCpuDoesntHang) { auto migrationSyncData = std::make_unique(MemoryConstants::pageSize); - uint32_t tagAddress = 5u; + TagAddressType tagAddress = 5u; migrationSyncData->signalUsage(&tagAddress, 2u); EXPECT_EQ(&tagAddress, migrationSyncData->tagAddress); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index 0345be7fb3..63521c91fb 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -62,7 +62,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGetting HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) { csr->initializeTagAllocation(); EXPECT_NE(nullptr, csr->getTagAddress()); - uint64_t tagAddress = castToUint64(const_cast(csr->getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(csr->getTagAddress())); auto expectedAddress = tagAddress + Drm::completionFenceOffset; EXPECT_EQ(expectedAddress, csr->getCompletionAddress()); } diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index bac34dde9c..15fb9c2926 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -746,7 +746,7 @@ struct MockDrmDirectSubmissionToTestDtor : public DrmDirectSubmissionuseContextForUserFenceWait = true; testDrmCsr->activePartitions = static_cast(drmCtxSize); - uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); FlushStamp handleToWait = 123; testDrmCsr->waitForFlushStamp(handleToWait); @@ -155,7 +155,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab EXPECT_NE(0u, mock->context.receivedGemWaitUserFence.ctxId); EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op); EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags); - EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); + EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout); } @@ -175,7 +175,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab testDrmCsr->useUserFenceWait = true; testDrmCsr->useContextForUserFenceWait = false; - uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); FlushStamp handleToWait = 123; testDrmCsr->waitForFlushStamp(handleToWait); @@ -188,7 +188,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.ctxId); EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op); EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags); - EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); + EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout); } @@ -210,7 +210,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab testDrmCsr->activePartitions = 2u; EXPECT_NE(0u, testDrmCsr->postSyncWriteOffset); - uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); + uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); FlushStamp handleToWait = 123; testDrmCsr->waitForFlushStamp(handleToWait); @@ -223,7 +223,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.ctxId); EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op); EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.flags); - EXPECT_EQ(DrmPrelimHelper::getU32WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); + EXPECT_EQ(DrmPrelimHelper::getU64WaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.mask); EXPECT_EQ(-1, mock->context.receivedGemWaitUserFence.timeout); } diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp index c464046765..fc31bc37d2 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp @@ -126,15 +126,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER}); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); - volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); - completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t)); + volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); + completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType)); *completionAddress = 1; - completionAddress += (postSyncOffset / sizeof(uint32_t)); + completionAddress += (postSyncOffset / sizeof(TagAddressType)); *completionAddress = 1; memoryManager->handleFenceCompletion(allocation); - uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + + uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset + postSyncOffset; constexpr uint64_t expectedValue = 2; @@ -160,15 +160,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER}); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); - volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); - completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t)); + volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); + completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType)); *completionAddress = 2; //1st context is ready - completionAddress += (postSyncOffset / sizeof(uint32_t)); + completionAddress += (postSyncOffset / sizeof(TagAddressType)); *completionAddress = 1; memoryManager->handleFenceCompletion(allocation); - uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + + uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset + postSyncOffset; constexpr uint64_t expectedValue = 2; diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index eee41dbefa..87831424df 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -5367,7 +5367,7 @@ TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOf auto engine = memoryManager->getRegisteredEngines()[0]; allocation->updateTaskCount(2, engine.osContext->getContextId()); - uint64_t expectedFenceAddress = castToUint64(const_cast(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset; + uint64_t expectedFenceAddress = castToUint64(const_cast(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset; constexpr uint64_t expectedValue = 2; memoryManager->handleFenceCompletion(allocation); diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp index 53f30bc973..46aa7a02a3 100644 --- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp +++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp @@ -22,7 +22,7 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase { uint32_t offset, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { @@ -32,7 +32,7 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase { void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp index cc185da6bc..3a386d4354 100644 --- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp +++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp @@ -34,7 +34,7 @@ class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHP } void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, - uint32_t currentTaskCount, + TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, diff --git a/shared/test/unit_test/utilities/wait_util_tests.cpp b/shared/test/unit_test/utilities/wait_util_tests.cpp index 9a2828a902..6688b2edb0 100644 --- a/shared/test/unit_test/utilities/wait_util_tests.cpp +++ b/shared/test/unit_test/utilities/wait_util_tests.cpp @@ -50,8 +50,8 @@ TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedDoesNotMeetCriteriaThe WaitUtils::init(); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); - volatile uint32_t pollValue = 1u; - uint32_t expectedValue = 3; + volatile TagAddressType pollValue = 1u; + TaskCountType expectedValue = 3; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); @@ -63,8 +63,8 @@ TEST(WaitTest, givenDefaultSettingsWhenPollAddressProvidedMeetsCriteriaThenPause WaitUtils::init(); EXPECT_EQ(WaitUtils::defaultWaitCount, WaitUtils::waitCount); - volatile uint32_t pollValue = 3u; - uint32_t expectedValue = 1; + volatile TagAddressType pollValue = 3u; + TaskCountType expectedValue = 1; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); bool ret = WaitUtils::waitFunction(&pollValue, expectedValue); @@ -82,8 +82,8 @@ TEST(WaitTest, givenDebugFlagSetZeroWhenPollAddressProvidedMeetsCriteriaThenPaus WaitUtils::init(); EXPECT_EQ(count, WaitUtils::waitCount); - volatile uint32_t pollValue = 3u; - uint32_t expectedValue = 1; + volatile TagAddressType pollValue = 3u; + TaskCountType expectedValue = 1; uint32_t oldCount = CpuIntrinsicsTests::pauseCounter.load(); bool ret = WaitUtils::waitFunction(&pollValue, expectedValue);