diff --git a/Jenkinsfile b/Jenkinsfile index 99b19b61b0..f94094e0c6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,4 +2,4 @@ dependenciesRevision='08be0789c5aab889fbf403d2b2968b402783d713-1413' strategy='EQUAL' allowedCD=257 -allowedF=20 +allowedF=21 diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index ccce2eaa9c..30aa89e678 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -268,6 +268,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> { this->isSpecialCommandQueue = newValue; } + bool isSpecial() { + return this->isSpecialCommandQueue; + } + QueuePriority getPriority() const { return priority; } diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 24abbf70b3..065dd9356c 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -264,12 +264,11 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); } - if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) { - if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForSinglePipeControl(); - } - expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + if (DebugManager.flags.PauseOnEnqueue.get() != -1) { + expectedSizeCS += MemorySynchronizationCommands::getSizeForSinglePipeControl() * 2; + expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2; } + return expectedSizeCS; } diff --git a/opencl/source/command_queue/hardware_interface.h b/opencl/source/command_queue/hardware_interface.h index e32c1161bc..05c0dd1519 100644 --- a/opencl/source/command_queue/hardware_interface.h +++ b/opencl/source/command_queue/hardware_interface.h @@ -73,6 +73,12 @@ class HardwareInterface { LinearStream *commandStream, CommandQueue &commandQueue); + static void dispatchDebugPauseCommands( + LinearStream *commandStream, + CommandQueue &commandQueue, + DebugPauseState confirmationTrigger, + DebugPauseState waitCondition); + static void programWalker( LinearStream &commandStream, Kernel &kernel, diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 8201b052fd..ff53ba58a1 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -80,6 +80,7 @@ void HardwareInterface::dispatchWalker( DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); + dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserStartConfirmation, DebugPauseState::hasUserStartConfirmation); size_t currentDispatchIndex = 0; for (auto &dispatchInfo : multiDispatchInfo) { @@ -101,24 +102,9 @@ void HardwareInterface::dispatchWalker( } HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress); } + + dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation); dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); - - if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) { - auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); - - if (static_cast(DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get()) == gpgpuCsr.peekTaskCount()) { - if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) { - NEO::PipeControlArgs args(true); - MemorySynchronizationCommands::addPipeControl(*commandStream, args); - } - - auto tagValue = *(gpgpuCsr.getTagAddress()); - auto tagAddress = gpgpuCsr.getTagAllocation()->getGpuAddress(); - - // Wait for (tag == tag - 1). This will be never satisfied. - HardwareCommandsHelper::programMiSemaphoreWait(*commandStream, tagAddress, (tagValue - 1), GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); - } - } } template @@ -245,4 +231,44 @@ void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueu ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); } } + +template +inline void HardwareInterface::dispatchDebugPauseCommands( + LinearStream *commandStream, + CommandQueue &commandQueue, + DebugPauseState confirmationTrigger, + DebugPauseState waitCondition) { + + if (static_cast(commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount()) == DebugManager.flags.PauseOnEnqueue.get() && + !commandQueue.isSpecial()) { + auto address = commandQueue.getGpgpuCommandStreamReceiver().getDebugPauseStateGPUAddress(); + { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + auto pCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL)); + *pCmd = GfxFamily::cmdInitPipeControl; + + pCmd->setCommandStreamerStallEnable(true); + pCmd->setDcFlushEnable(true); + pCmd->setAddress(static_cast(address & 0x0000FFFFFFFFULL)); + pCmd->setAddressHigh(static_cast(address >> 32)); + pCmd->setPostSyncOperation(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + pCmd->setImmediateData(static_cast(confirmationTrigger)); + } + + { + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + + auto pCmd = (MI_SEMAPHORE_WAIT *)commandStream->getSpace(sizeof(MI_SEMAPHORE_WAIT)); + *pCmd = GfxFamily::cmdInitMiSemaphoreWait; + + pCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); + pCmd->setSemaphoreDataDword(static_cast(waitCondition)); + pCmd->setSemaphoreGraphicsAddress(address); + pCmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); + } + } +} + } // namespace NEO diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 378b3b6326..225712860e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -486,105 +486,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc); } -HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueue) { - using WALKER_TYPE = typename FamilyType::WALKER_TYPE; - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - DebugManagerStateRestore restore; - DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1); - - auto &csr = pDevice->getGpgpuCommandStreamReceiver(); - auto tagAddress = csr.getTagAllocation()->getGpuAddress(); - MockKernelWithInternals mockKernel(*pClDevice); - - size_t off[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); - pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); - - HardwareParse hwParser; - hwParser.parseCommands(*pCmdQ); - auto &cmdList = hwParser.cmdList; - - auto lastWalker = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), lastWalker); - lastWalker = find(++lastWalker, cmdList.end()); - EXPECT_NE(cmdList.end(), lastWalker); - - auto semaphore = find(lastWalker, cmdList.end()); - bool semaphoreAfterWalkerFound = false; - while (semaphore != cmdList.end()) { - auto semaphoreCmd = genCmdCast(*semaphore); - if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { - EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); - semaphoreAfterWalkerFound = true; - break; - } - semaphore = find(++semaphore, cmdList.end()); - } - EXPECT_TRUE(semaphoreAfterWalkerFound); - - EXPECT_EQ(nullptr, genCmdCast(*(--semaphore))); - - semaphore = find(cmdList.begin(), lastWalker); - while (semaphore != lastWalker) { - auto semaphoreCmd = genCmdCast(*semaphore); - if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { - EXPECT_NE((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword()); - } - semaphore = find(++semaphore, lastWalker); - } -} - -HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueueAndFlushCacheBefore) { - using WALKER_TYPE = typename FamilyType::WALKER_TYPE; - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - DebugManagerStateRestore restore; - DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1); - DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.set(true); - - auto &csr = pDevice->getGpgpuCommandStreamReceiver(); - auto tagAddress = csr.getTagAllocation()->getGpuAddress(); - MockKernelWithInternals mockKernel(*pClDevice); - - size_t off[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - - pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); - pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); - - HardwareParse hwParser; - hwParser.parseCommands(*pCmdQ); - auto &cmdList = hwParser.cmdList; - - auto lastWalker = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), lastWalker); - lastWalker = find(++lastWalker, cmdList.end()); - EXPECT_NE(cmdList.end(), lastWalker); - - auto semaphore = find(lastWalker, cmdList.end()); - bool semaphoreAfterWalkerFound = false; - while (semaphore != cmdList.end()) { - auto semaphoreCmd = genCmdCast(*semaphore); - if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { - EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); - semaphoreAfterWalkerFound = true; - break; - } - semaphore = find(++semaphore, cmdList.end()); - } - EXPECT_TRUE(semaphoreAfterWalkerFound); - - auto pipeControl = genCmdCast(*(--semaphore)); - EXPECT_NE(nullptr, pipeControl); - EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_TRUE(pipeControl->getDcFlushEnable()); -} - HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); @@ -1357,3 +1258,152 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIs EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true); EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u); } + +HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + DebugManagerStateRestore restore; + DebugManager.flags.PauseOnEnqueue.set(1); + + auto &csr = pDevice->getGpgpuCommandStreamReceiver(); + auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress(); + + MockKernelWithInternals mockKernel(*pClDevice); + + size_t off[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); + pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); + + HardwareParse hwParser; + hwParser.parseCommands(*pCmdQ); + auto &cmdList = hwParser.cmdList; + + auto semaphore = find(cmdList.begin(), cmdList.end()); + bool semaphoreBeforeWalkerFound = false; + bool semaphoreAfterWalkerFound = false; + while (semaphore != cmdList.end()) { + auto semaphoreCmd = genCmdCast(*semaphore); + if (static_cast(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword()) { + EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); + + semaphoreBeforeWalkerFound = true; + } + + if (static_cast(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword()) { + EXPECT_TRUE(semaphoreBeforeWalkerFound); + EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); + + semaphoreAfterWalkerFound = true; + break; + } + + semaphore = find(++semaphore, cmdList.end()); + } + + EXPECT_TRUE(semaphoreAfterWalkerFound); + + auto pipeControl = find(cmdList.begin(), cmdList.end()); + bool pipeControlBeforeWalkerFound = false; + bool pipeControlAfterWalkerFound = false; + while (pipeControl != cmdList.end()) { + auto pipeControlCmd = genCmdCast(*pipeControl); + if (static_cast(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) { + EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); + EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(static_cast(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress()); + EXPECT_EQ(static_cast(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation()); + + pipeControlBeforeWalkerFound = true; + } + + if (static_cast(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) { + EXPECT_TRUE(pipeControlBeforeWalkerFound); + EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); + EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(static_cast(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress()); + EXPECT_EQ(static_cast(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation()); + + pipeControlAfterWalkerFound = true; + break; + } + + pipeControl = find(++pipeControl, cmdList.end()); + } + + EXPECT_TRUE(pipeControlAfterWalkerFound); +} + +HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontInsertPauseCommandsWhenUsingSpecialQueue) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + DebugManagerStateRestore restore; + DebugManager.flags.PauseOnEnqueue.set(0); + + auto &csr = pDevice->getGpgpuCommandStreamReceiver(); + auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress(); + + pCmdQ->setIsSpecialCommandQueue(true); + + MockKernelWithInternals mockKernel(*pClDevice); + + size_t off[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); + + HardwareParse hwParser; + hwParser.parseCommands(*pCmdQ); + auto &cmdList = hwParser.cmdList; + + auto semaphore = find(cmdList.begin(), cmdList.end()); + bool semaphoreBeforeWalkerFound = false; + bool semaphoreAfterWalkerFound = false; + while (semaphore != cmdList.end()) { + auto semaphoreCmd = genCmdCast(*semaphore); + if (static_cast(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword() && + debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { + semaphoreBeforeWalkerFound = true; + } + + if (static_cast(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword() && + debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { + semaphoreAfterWalkerFound = true; + } + + semaphore = find(++semaphore, cmdList.end()); + } + + EXPECT_FALSE(semaphoreBeforeWalkerFound); + EXPECT_FALSE(semaphoreAfterWalkerFound); + + auto pipeControl = find(cmdList.begin(), cmdList.end()); + bool pipeControlBeforeWalkerFound = false; + bool pipeControlAfterWalkerFound = false; + while (pipeControl != cmdList.end()) { + auto pipeControlCmd = genCmdCast(*pipeControl); + if (static_cast(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) { + pipeControlBeforeWalkerFound = true; + } + + if (static_cast(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) { + pipeControlAfterWalkerFound = true; + } + + pipeControl = find(++pipeControl, cmdList.end()); + } + + EXPECT_FALSE(pipeControlBeforeWalkerFound); + EXPECT_FALSE(pipeControlAfterWalkerFound); + + pCmdQ->setIsSpecialCommandQueue(false); +} diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 61a2ab825a..fa156215e8 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -656,6 +656,98 @@ TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndNoSuitableReu memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } +HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenSettingFlagProgressThenFunctionAsksTwiceForConfirmation) { + DebugManagerStateRestore restore; + DebugManager.flags.PauseOnEnqueue.set(0); + testing::internal::CaptureStdout(); + int32_t executionStamp = 0; + auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + + uint32_t confirmationCounter = 0; + + mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { + if (confirmationCounter == 0) { + EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); + confirmationCounter++; + } else if (confirmationCounter == 1) { + EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); + confirmationCounter++; + } + }; + + pDevice->resetCommandStreamReceiver(mockCSR); + + *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; + + while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation) + ; + + *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; + + while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserEndConfirmation) + ; + + mockCSR->userPauseConfirmation.join(); + + EXPECT_EQ(2u, confirmationCounter); + + auto output = testing::internal::GetCapturedStdout(); + EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload"))); + EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue"))); +} + +HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtFirstStageThenFunctionEndsCorrectly) { + DebugManagerStateRestore restore; + DebugManager.flags.PauseOnEnqueue.set(0); + testing::internal::CaptureStdout(); + int32_t executionStamp = 0; + auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + + uint32_t confirmationCounter = 0; + + mockCSR->debugConfirmationFunction = [&confirmationCounter]() { + confirmationCounter++; + }; + + pDevice->resetCommandStreamReceiver(mockCSR); + + *mockCSR->debugPauseStateAddress = DebugPauseState::terminate; + mockCSR->userPauseConfirmation.join(); + + EXPECT_EQ(0u, confirmationCounter); + auto output = testing::internal::GetCapturedStdout(); + EXPECT_EQ(0u, output.length()); +} + +HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtSecondStageThenFunctionEndsCorrectly) { + DebugManagerStateRestore restore; + DebugManager.flags.PauseOnEnqueue.set(0); + testing::internal::CaptureStdout(); + int32_t executionStamp = 0; + auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + + uint32_t confirmationCounter = 0; + + mockCSR->debugConfirmationFunction = [&confirmationCounter]() { + confirmationCounter++; + }; + + pDevice->resetCommandStreamReceiver(mockCSR); + + *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; + + while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation) + ; + + *mockCSR->debugPauseStateAddress = DebugPauseState::terminate; + mockCSR->userPauseConfirmation.join(); + + auto output = testing::internal::GetCapturedStdout(); + EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload"))); + EXPECT_THAT(output, testing::Not(testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue")))); + EXPECT_EQ(1u, confirmationCounter); +} + class CommandStreamReceiverWithAubSubCaptureTest : public CommandStreamReceiverTest, public ::testing::WithParamInterface> {}; diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index 89af05d438..ce1674b98a 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -47,6 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired; using BaseClass::CommandStreamReceiver::cleanupResources; using BaseClass::CommandStreamReceiver::commandStream; + using BaseClass::CommandStreamReceiver::debugConfirmationFunction; + using BaseClass::CommandStreamReceiver::debugPauseStateAddress; using BaseClass::CommandStreamReceiver::dispatchMode; using BaseClass::CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver::experimentalCmdBuffer; @@ -84,6 +86,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::taskLevel; using BaseClass::CommandStreamReceiver::timestampPacketAllocator; using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled; + using BaseClass::CommandStreamReceiver::userPauseConfirmation; using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList; UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex), recursiveLockCounter(0), diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index fbede7fe06..a1f52a0c6b 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -35,8 +35,7 @@ OverrideGpuAddressSpace = -1 OverrideMaxWorkgroupSize = -1 DoCpuCopyOnReadBuffer = -1 DoCpuCopyOnWriteBuffer = -1 -AddBlockingSemaphoreAfterSpecificEnqueue = -1 -AddCacheFlushBeforeBlockingSemaphore = 0 +PauseOnEnqueue = -1 EnableDebugBreak = 1 FlushAllCaches = 0 MakeEachEnqueueBlocking = 0 diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index e17b38fc0a..6863255f3d 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -49,6 +49,11 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi } CommandStreamReceiver::~CommandStreamReceiver() { + if (userPauseConfirmation.joinable()) { + *debugPauseStateAddress = DebugPauseState::terminate; + userPauseConfirmation.join(); + } + for (int i = 0; i < IndirectHeap::NUM_TYPES; ++i) { if (indirectHeap[i] != nullptr) { auto allocation = indirectHeap[i]->getGraphicsAllocation(); @@ -238,6 +243,8 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { this->tagAllocation = allocation; UNRECOVERABLE_IF(allocation == nullptr); this->tagAddress = reinterpret_cast(allocation->getUnderlyingBuffer()); + this->debugPauseStateAddress = reinterpret_cast( + reinterpret_cast(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset); } FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const { @@ -390,6 +397,36 @@ bool CommandStreamReceiver::initializeTagAllocation() { this->setTagAllocation(tagAllocation); *this->tagAddress = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag; + *this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore; + + if (DebugManager.flags.PauseOnEnqueue.get() != -1) { + userPauseConfirmation = std::thread( + [this]() { + while (*debugPauseStateAddress != DebugPauseState::waitingForUserStartConfirmation) { + if (*debugPauseStateAddress == DebugPauseState::terminate) { + return; + } + std::this_thread::yield(); + } + + std::cout << "Debug break: Press enter to start workload" << std::endl; + debugConfirmationFunction(); + + *debugPauseStateAddress = DebugPauseState::hasUserStartConfirmation; + + while (*debugPauseStateAddress != DebugPauseState::waitingForUserEndConfirmation) { + if (*debugPauseStateAddress == DebugPauseState::terminate) { + return; + } + std::this_thread::yield(); + } + + std::cout << "Debug break: Workload ended, press enter to continue" << std::endl; + debugConfirmationFunction(); + + *debugPauseStateAddress = DebugPauseState::hasUserEndConfirmation; + }); + } return true; } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 299896decb..57b81e1938 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -52,6 +52,16 @@ enum class DispatchMode { BatchedDispatch // dispatching is batched, explicit clFlush is required }; +enum class DebugPauseState : uint32_t { + disabled, + waitingForFirstSemaphore, + waitingForUserStartConfirmation, + hasUserStartConfirmation, + waitingForUserEndConfirmation, + hasUserEndConfirmation, + terminate +}; + class CommandStreamReceiver { public: enum class SamplerCacheFlushState { @@ -59,6 +69,7 @@ class CommandStreamReceiver { samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image }; + using MutexType = std::recursive_mutex; CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); virtual ~CommandStreamReceiver(); @@ -101,6 +112,7 @@ class CommandStreamReceiver { return tagAllocation; } volatile uint32_t *getTagAddress() const { return tagAddress; } + uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; } virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }; @@ -231,6 +243,13 @@ class CommandStreamReceiver { LinearStream commandStream; volatile uint32_t *tagAddress = nullptr; + volatile DebugPauseState *debugPauseStateAddress = nullptr; + + // offset for debug state must be 8 bytes, if only 4 bytes are used tag writes overwrite it + const uint64_t debugPauseStateAddressOffset = 8; + + std::thread userPauseConfirmation; + std::function debugConfirmationFunction = []() { std::cin.get(); }; GraphicsAllocation *tagAllocation = nullptr; GraphicsAllocation *globalFenceAllocation = nullptr; @@ -253,6 +272,7 @@ class CommandStreamReceiver { // taskCount - # of tasks submitted uint32_t taskCount = 0; + uint32_t lastSentL3Config = 0; uint32_t latestSentStatelessMocsConfig = 0; uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index bd56890e3d..099e4b8f6f 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -45,8 +45,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1: DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value") DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnReadBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Read Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)") DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnWriteBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Write Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)") -DECLARE_DEBUG_VARIABLE(int32_t, AddBlockingSemaphoreAfterSpecificEnqueue, -1, "-1: Disabled. >=0: Zero based enqueue index. For debug only. It may not work correctly with multi CSR submissions") -DECLARE_DEBUG_VARIABLE(bool, AddCacheFlushBeforeBlockingSemaphore, false, "Add stalling pipe_control with cache flush before semaphore. Works only with AddBlockingSemaphoreAfterSpecificEnqueue>=0") +DECLARE_DEBUG_VARIABLE(int32_t, PauseOnEnqueue, -1, "-1: default x: pause on enqueue number x and ask for user confirmation before and after execution, counted from 0") DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs") DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches") DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue")