Debug functionality to pause before and after specified GPGPU_WALKER.

Resolves: NEO-3961

Change-Id: If797858c0f6a9758f9c1bc5472841dcfff93884b
Signed-off-by: Piotr Zdunowski <piotr.zdunowski@intel.com>
This commit is contained in:
Piotr Zdunowski
2020-04-30 17:12:01 +02:00
committed by sys_ocldev
parent 5323a99ab8
commit 5a2bff7706
12 changed files with 361 additions and 126 deletions

2
Jenkinsfile vendored
View File

@@ -2,4 +2,4 @@
dependenciesRevision='08be0789c5aab889fbf403d2b2968b402783d713-1413'
strategy='EQUAL'
allowedCD=257
allowedF=20
allowedF=21

View File

@@ -268,6 +268,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
this->isSpecialCommandQueue = newValue;
}
bool isSpecial() {
return this->isSpecialCommandQueue;
}
QueuePriority getPriority() const {
return priority;
}

View File

@@ -264,12 +264,11 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
}
if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) {
if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
}
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() * 2;
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2;
}
return expectedSizeCS;
}

View File

@@ -73,6 +73,12 @@ class HardwareInterface {
LinearStream *commandStream,
CommandQueue &commandQueue);
static void dispatchDebugPauseCommands(
LinearStream *commandStream,
CommandQueue &commandQueue,
DebugPauseState confirmationTrigger,
DebugPauseState waitCondition);
static void programWalker(
LinearStream &commandStream,
Kernel &kernel,

View File

@@ -80,6 +80,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserStartConfirmation, DebugPauseState::hasUserStartConfirmation);
size_t currentDispatchIndex = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
@@ -101,24 +102,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
}
HardwareCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress);
}
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) {
auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
if (static_cast<uint32_t>(DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get()) == gpgpuCsr.peekTaskCount()) {
if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) {
NEO::PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandStream, args);
}
auto tagValue = *(gpgpuCsr.getTagAddress());
auto tagAddress = gpgpuCsr.getTagAllocation()->getGpuAddress();
// Wait for (tag == tag - 1). This will be never satisfied.
HardwareCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, tagAddress, (tagValue - 1), GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD);
}
}
}
template <typename GfxFamily>
@@ -245,4 +231,44 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
}
}
template <typename GfxFamily>
inline void HardwareInterface<GfxFamily>::dispatchDebugPauseCommands(
LinearStream *commandStream,
CommandQueue &commandQueue,
DebugPauseState confirmationTrigger,
DebugPauseState waitCondition) {
if (static_cast<int32_t>(commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount()) == DebugManager.flags.PauseOnEnqueue.get() &&
!commandQueue.isSpecial()) {
auto address = commandQueue.getGpgpuCommandStreamReceiver().getDebugPauseStateGPUAddress();
{
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
auto pCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
*pCmd = GfxFamily::cmdInitPipeControl;
pCmd->setCommandStreamerStallEnable(true);
pCmd->setDcFlushEnable(true);
pCmd->setAddress(static_cast<uint32_t>(address & 0x0000FFFFFFFFULL));
pCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
pCmd->setPostSyncOperation(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
pCmd->setImmediateData(static_cast<uint32_t>(confirmationTrigger));
}
{
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto pCmd = (MI_SEMAPHORE_WAIT *)commandStream->getSpace(sizeof(MI_SEMAPHORE_WAIT));
*pCmd = GfxFamily::cmdInitMiSemaphoreWait;
pCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD);
pCmd->setSemaphoreDataDword(static_cast<uint32_t>(waitCondition));
pCmd->setSemaphoreGraphicsAddress(address);
pCmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
}
}
} // namespace NEO

View File

@@ -486,105 +486,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc);
}
HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueue) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1);
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
auto tagAddress = csr.getTagAllocation()->getGpuAddress();
MockKernelWithInternals mockKernel(*pClDevice);
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*pCmdQ);
auto &cmdList = hwParser.cmdList;
auto lastWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), lastWalker);
lastWalker = find<WALKER_TYPE *>(++lastWalker, cmdList.end());
EXPECT_NE(cmdList.end(), lastWalker);
auto semaphore = find<MI_SEMAPHORE_WAIT *>(lastWalker, cmdList.end());
bool semaphoreAfterWalkerFound = false;
while (semaphore != cmdList.end()) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
semaphoreAfterWalkerFound = true;
break;
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
}
EXPECT_TRUE(semaphoreAfterWalkerFound);
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*(--semaphore)));
semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), lastWalker);
while (semaphore != lastWalker) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
EXPECT_NE((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, lastWalker);
}
}
HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueueAndFlushCacheBefore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1);
DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.set(true);
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
auto tagAddress = csr.getTagAllocation()->getGpuAddress();
MockKernelWithInternals mockKernel(*pClDevice);
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*pCmdQ);
auto &cmdList = hwParser.cmdList;
auto lastWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), lastWalker);
lastWalker = find<WALKER_TYPE *>(++lastWalker, cmdList.end());
EXPECT_NE(cmdList.end(), lastWalker);
auto semaphore = find<MI_SEMAPHORE_WAIT *>(lastWalker, cmdList.end());
bool semaphoreAfterWalkerFound = false;
while (semaphore != cmdList.end()) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
semaphoreAfterWalkerFound = true;
break;
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
}
EXPECT_TRUE(semaphoreAfterWalkerFound);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*(--semaphore));
EXPECT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());
}
HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) {
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
@@ -1357,3 +1258,152 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIs
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u);
}
HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.PauseOnEnqueue.set(1);
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
MockKernelWithInternals mockKernel(*pClDevice);
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*pCmdQ);
auto &cmdList = hwParser.cmdList;
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
bool semaphoreBeforeWalkerFound = false;
bool semaphoreAfterWalkerFound = false;
while (semaphore != cmdList.end()) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
if (static_cast<uint32_t>(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword()) {
EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode());
semaphoreBeforeWalkerFound = true;
}
if (static_cast<uint32_t>(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword()) {
EXPECT_TRUE(semaphoreBeforeWalkerFound);
EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode());
semaphoreAfterWalkerFound = true;
break;
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
}
EXPECT_TRUE(semaphoreAfterWalkerFound);
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
bool pipeControlBeforeWalkerFound = false;
bool pipeControlAfterWalkerFound = false;
while (pipeControl != cmdList.end()) {
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
if (static_cast<uint32_t>(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) {
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress());
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation());
pipeControlBeforeWalkerFound = true;
}
if (static_cast<uint32_t>(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) {
EXPECT_TRUE(pipeControlBeforeWalkerFound);
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress());
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation());
pipeControlAfterWalkerFound = true;
break;
}
pipeControl = find<PIPE_CONTROL *>(++pipeControl, cmdList.end());
}
EXPECT_TRUE(pipeControlAfterWalkerFound);
}
HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontInsertPauseCommandsWhenUsingSpecialQueue) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.PauseOnEnqueue.set(0);
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
pCmdQ->setIsSpecialCommandQueue(true);
MockKernelWithInternals mockKernel(*pClDevice);
size_t off[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*pCmdQ);
auto &cmdList = hwParser.cmdList;
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
bool semaphoreBeforeWalkerFound = false;
bool semaphoreAfterWalkerFound = false;
while (semaphore != cmdList.end()) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
if (static_cast<uint32_t>(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword() &&
debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
semaphoreBeforeWalkerFound = true;
}
if (static_cast<uint32_t>(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword() &&
debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
semaphoreAfterWalkerFound = true;
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
}
EXPECT_FALSE(semaphoreBeforeWalkerFound);
EXPECT_FALSE(semaphoreAfterWalkerFound);
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
bool pipeControlBeforeWalkerFound = false;
bool pipeControlAfterWalkerFound = false;
while (pipeControl != cmdList.end()) {
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
if (static_cast<uint32_t>(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) {
pipeControlBeforeWalkerFound = true;
}
if (static_cast<uint32_t>(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) {
pipeControlAfterWalkerFound = true;
}
pipeControl = find<PIPE_CONTROL *>(++pipeControl, cmdList.end());
}
EXPECT_FALSE(pipeControlBeforeWalkerFound);
EXPECT_FALSE(pipeControlAfterWalkerFound);
pCmdQ->setIsSpecialCommandQueue(false);
}

View File

@@ -656,6 +656,98 @@ TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndNoSuitableReu
memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation());
}
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenSettingFlagProgressThenFunctionAsksTwiceForConfirmation) {
DebugManagerStateRestore restore;
DebugManager.flags.PauseOnEnqueue.set(0);
testing::internal::CaptureStdout();
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
uint32_t confirmationCounter = 0;
mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() {
if (confirmationCounter == 0) {
EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress);
confirmationCounter++;
} else if (confirmationCounter == 1) {
EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress);
confirmationCounter++;
}
};
pDevice->resetCommandStreamReceiver(mockCSR);
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation;
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation)
;
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation;
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserEndConfirmation)
;
mockCSR->userPauseConfirmation.join();
EXPECT_EQ(2u, confirmationCounter);
auto output = testing::internal::GetCapturedStdout();
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload")));
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue")));
}
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtFirstStageThenFunctionEndsCorrectly) {
DebugManagerStateRestore restore;
DebugManager.flags.PauseOnEnqueue.set(0);
testing::internal::CaptureStdout();
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
uint32_t confirmationCounter = 0;
mockCSR->debugConfirmationFunction = [&confirmationCounter]() {
confirmationCounter++;
};
pDevice->resetCommandStreamReceiver(mockCSR);
*mockCSR->debugPauseStateAddress = DebugPauseState::terminate;
mockCSR->userPauseConfirmation.join();
EXPECT_EQ(0u, confirmationCounter);
auto output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.length());
}
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtSecondStageThenFunctionEndsCorrectly) {
DebugManagerStateRestore restore;
DebugManager.flags.PauseOnEnqueue.set(0);
testing::internal::CaptureStdout();
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
uint32_t confirmationCounter = 0;
mockCSR->debugConfirmationFunction = [&confirmationCounter]() {
confirmationCounter++;
};
pDevice->resetCommandStreamReceiver(mockCSR);
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation;
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation)
;
*mockCSR->debugPauseStateAddress = DebugPauseState::terminate;
mockCSR->userPauseConfirmation.join();
auto output = testing::internal::GetCapturedStdout();
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload")));
EXPECT_THAT(output, testing::Not(testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue"))));
EXPECT_EQ(1u, confirmationCounter);
}
class CommandStreamReceiverWithAubSubCaptureTest : public CommandStreamReceiverTest,
public ::testing::WithParamInterface<std::pair<bool, bool>> {};

View File

@@ -47,6 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
using BaseClass::CommandStreamReceiver::cleanupResources;
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;
using BaseClass::CommandStreamReceiver::dispatchMode;
using BaseClass::CommandStreamReceiver::executionEnvironment;
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
@@ -84,6 +86,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::taskLevel;
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;
using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled;
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex), recursiveLockCounter(0),

View File

@@ -35,8 +35,7 @@ OverrideGpuAddressSpace = -1
OverrideMaxWorkgroupSize = -1
DoCpuCopyOnReadBuffer = -1
DoCpuCopyOnWriteBuffer = -1
AddBlockingSemaphoreAfterSpecificEnqueue = -1
AddCacheFlushBeforeBlockingSemaphore = 0
PauseOnEnqueue = -1
EnableDebugBreak = 1
FlushAllCaches = 0
MakeEachEnqueueBlocking = 0

View File

@@ -49,6 +49,11 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
}
CommandStreamReceiver::~CommandStreamReceiver() {
if (userPauseConfirmation.joinable()) {
*debugPauseStateAddress = DebugPauseState::terminate;
userPauseConfirmation.join();
}
for (int i = 0; i < IndirectHeap::NUM_TYPES; ++i) {
if (indirectHeap[i] != nullptr) {
auto allocation = indirectHeap[i]->getGraphicsAllocation();
@@ -238,6 +243,8 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
this->tagAllocation = allocation;
UNRECOVERABLE_IF(allocation == nullptr);
this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
this->debugPauseStateAddress = reinterpret_cast<DebugPauseState *>(
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset);
}
FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {
@@ -390,6 +397,36 @@ bool CommandStreamReceiver::initializeTagAllocation() {
this->setTagAllocation(tagAllocation);
*this->tagAddress = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag;
*this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore;
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
userPauseConfirmation = std::thread(
[this]() {
while (*debugPauseStateAddress != DebugPauseState::waitingForUserStartConfirmation) {
if (*debugPauseStateAddress == DebugPauseState::terminate) {
return;
}
std::this_thread::yield();
}
std::cout << "Debug break: Press enter to start workload" << std::endl;
debugConfirmationFunction();
*debugPauseStateAddress = DebugPauseState::hasUserStartConfirmation;
while (*debugPauseStateAddress != DebugPauseState::waitingForUserEndConfirmation) {
if (*debugPauseStateAddress == DebugPauseState::terminate) {
return;
}
std::this_thread::yield();
}
std::cout << "Debug break: Workload ended, press enter to continue" << std::endl;
debugConfirmationFunction();
*debugPauseStateAddress = DebugPauseState::hasUserEndConfirmation;
});
}
return true;
}

View File

@@ -52,6 +52,16 @@ enum class DispatchMode {
BatchedDispatch // dispatching is batched, explicit clFlush is required
};
enum class DebugPauseState : uint32_t {
disabled,
waitingForFirstSemaphore,
waitingForUserStartConfirmation,
hasUserStartConfirmation,
waitingForUserEndConfirmation,
hasUserEndConfirmation,
terminate
};
class CommandStreamReceiver {
public:
enum class SamplerCacheFlushState {
@@ -59,6 +69,7 @@ class CommandStreamReceiver {
samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image
samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image
};
using MutexType = std::recursive_mutex;
CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex);
virtual ~CommandStreamReceiver();
@@ -101,6 +112,7 @@ class CommandStreamReceiver {
return tagAllocation;
}
volatile uint32_t *getTagAddress() const { return tagAddress; }
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; };
@@ -231,6 +243,13 @@ class CommandStreamReceiver {
LinearStream commandStream;
volatile uint32_t *tagAddress = nullptr;
volatile DebugPauseState *debugPauseStateAddress = nullptr;
// offset for debug state must be 8 bytes, if only 4 bytes are used tag writes overwrite it
const uint64_t debugPauseStateAddressOffset = 8;
std::thread userPauseConfirmation;
std::function<void()> debugConfirmationFunction = []() { std::cin.get(); };
GraphicsAllocation *tagAllocation = nullptr;
GraphicsAllocation *globalFenceAllocation = nullptr;
@@ -253,6 +272,7 @@ class CommandStreamReceiver {
// taskCount - # of tasks submitted
uint32_t taskCount = 0;
uint32_t lastSentL3Config = 0;
uint32_t latestSentStatelessMocsConfig = 0;
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;

View File

@@ -45,8 +45,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1:
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value")
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnReadBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Read Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnWriteBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Write Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
DECLARE_DEBUG_VARIABLE(int32_t, AddBlockingSemaphoreAfterSpecificEnqueue, -1, "-1: Disabled. >=0: Zero based enqueue index. For debug only. It may not work correctly with multi CSR submissions")
DECLARE_DEBUG_VARIABLE(bool, AddCacheFlushBeforeBlockingSemaphore, false, "Add stalling pipe_control with cache flush before semaphore. Works only with AddBlockingSemaphoreAfterSpecificEnqueue>=0")
DECLARE_DEBUG_VARIABLE(int32_t, PauseOnEnqueue, -1, "-1: default x: pause on enqueue number x and ask for user confirmation before and after execution, counted from 0")
DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs")
DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches")
DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue")