mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Debug functionality to pause before and after specified GPGPU_WALKER.
Resolves: NEO-3961 Change-Id: If797858c0f6a9758f9c1bc5472841dcfff93884b Signed-off-by: Piotr Zdunowski <piotr.zdunowski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
5323a99ab8
commit
5a2bff7706
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@@ -2,4 +2,4 @@
|
||||
dependenciesRevision='08be0789c5aab889fbf403d2b2968b402783d713-1413'
|
||||
strategy='EQUAL'
|
||||
allowedCD=257
|
||||
allowedF=20
|
||||
allowedF=21
|
||||
|
||||
@@ -268,6 +268,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
this->isSpecialCommandQueue = newValue;
|
||||
}
|
||||
|
||||
bool isSpecial() {
|
||||
return this->isSpecialCommandQueue;
|
||||
}
|
||||
|
||||
QueuePriority getPriority() const {
|
||||
return priority;
|
||||
}
|
||||
|
||||
@@ -264,12 +264,11 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
||||
}
|
||||
|
||||
if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) {
|
||||
if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) {
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
|
||||
}
|
||||
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() * 2;
|
||||
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2;
|
||||
}
|
||||
|
||||
return expectedSizeCS;
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,12 @@ class HardwareInterface {
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue);
|
||||
|
||||
static void dispatchDebugPauseCommands(
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue,
|
||||
DebugPauseState confirmationTrigger,
|
||||
DebugPauseState waitCondition);
|
||||
|
||||
static void programWalker(
|
||||
LinearStream &commandStream,
|
||||
Kernel &kernel,
|
||||
|
||||
@@ -80,6 +80,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserStartConfirmation, DebugPauseState::hasUserStartConfirmation);
|
||||
|
||||
size_t currentDispatchIndex = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
@@ -101,24 +102,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
}
|
||||
HardwareCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress);
|
||||
}
|
||||
|
||||
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
|
||||
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
|
||||
if (DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get() != -1) {
|
||||
auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (static_cast<uint32_t>(DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get()) == gpgpuCsr.peekTaskCount()) {
|
||||
if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) {
|
||||
NEO::PipeControlArgs args(true);
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandStream, args);
|
||||
}
|
||||
|
||||
auto tagValue = *(gpgpuCsr.getTagAddress());
|
||||
auto tagAddress = gpgpuCsr.getTagAllocation()->getGpuAddress();
|
||||
|
||||
// Wait for (tag == tag - 1). This will be never satisfied.
|
||||
HardwareCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, tagAddress, (tagValue - 1), GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -245,4 +231,44 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
|
||||
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void HardwareInterface<GfxFamily>::dispatchDebugPauseCommands(
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue,
|
||||
DebugPauseState confirmationTrigger,
|
||||
DebugPauseState waitCondition) {
|
||||
|
||||
if (static_cast<int32_t>(commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount()) == DebugManager.flags.PauseOnEnqueue.get() &&
|
||||
!commandQueue.isSpecial()) {
|
||||
auto address = commandQueue.getGpgpuCommandStreamReceiver().getDebugPauseStateGPUAddress();
|
||||
{
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
auto pCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
||||
*pCmd = GfxFamily::cmdInitPipeControl;
|
||||
|
||||
pCmd->setCommandStreamerStallEnable(true);
|
||||
pCmd->setDcFlushEnable(true);
|
||||
pCmd->setAddress(static_cast<uint32_t>(address & 0x0000FFFFFFFFULL));
|
||||
pCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
|
||||
pCmd->setPostSyncOperation(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
|
||||
pCmd->setImmediateData(static_cast<uint32_t>(confirmationTrigger));
|
||||
}
|
||||
|
||||
{
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto pCmd = (MI_SEMAPHORE_WAIT *)commandStream->getSpace(sizeof(MI_SEMAPHORE_WAIT));
|
||||
*pCmd = GfxFamily::cmdInitMiSemaphoreWait;
|
||||
|
||||
pCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD);
|
||||
pCmd->setSemaphoreDataDword(static_cast<uint32_t>(waitCondition));
|
||||
pCmd->setSemaphoreGraphicsAddress(address);
|
||||
pCmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -486,105 +486,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
|
||||
EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueue) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto tagAddress = csr.getTagAllocation()->getGpuAddress();
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
auto &cmdList = hwParser.cmdList;
|
||||
|
||||
auto lastWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), lastWalker);
|
||||
lastWalker = find<WALKER_TYPE *>(++lastWalker, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), lastWalker);
|
||||
|
||||
auto semaphore = find<MI_SEMAPHORE_WAIT *>(lastWalker, cmdList.end());
|
||||
bool semaphoreAfterWalkerFound = false;
|
||||
while (semaphore != cmdList.end()) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
|
||||
EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
semaphoreAfterWalkerFound = true;
|
||||
break;
|
||||
}
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
|
||||
}
|
||||
EXPECT_TRUE(semaphoreAfterWalkerFound);
|
||||
|
||||
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*(--semaphore)));
|
||||
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), lastWalker);
|
||||
while (semaphore != lastWalker) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
|
||||
EXPECT_NE((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
|
||||
}
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, lastWalker);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenDebugFlagSetWhenDispatchWalkersThenSetBlockingSemaphoreAfterSpecificEnqueueAndFlushCacheBefore) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.set(1);
|
||||
DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.set(true);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto tagAddress = csr.getTagAllocation()->getGpuAddress();
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
auto &cmdList = hwParser.cmdList;
|
||||
|
||||
auto lastWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), lastWalker);
|
||||
lastWalker = find<WALKER_TYPE *>(++lastWalker, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), lastWalker);
|
||||
|
||||
auto semaphore = find<MI_SEMAPHORE_WAIT *>(lastWalker, cmdList.end());
|
||||
bool semaphoreAfterWalkerFound = false;
|
||||
while (semaphore != cmdList.end()) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
if (tagAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
|
||||
EXPECT_EQ((*(csr.getTagAddress()) - 1), semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
semaphoreAfterWalkerFound = true;
|
||||
break;
|
||||
}
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
|
||||
}
|
||||
EXPECT_TRUE(semaphoreAfterWalkerFound);
|
||||
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*(--semaphore));
|
||||
EXPECT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) {
|
||||
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
@@ -1357,3 +1258,152 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIs
|
||||
EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true);
|
||||
EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.PauseOnEnqueue.set(1);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
auto &cmdList = hwParser.cmdList;
|
||||
|
||||
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
bool semaphoreBeforeWalkerFound = false;
|
||||
bool semaphoreAfterWalkerFound = false;
|
||||
while (semaphore != cmdList.end()) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
if (static_cast<uint32_t>(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword()) {
|
||||
EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode());
|
||||
|
||||
semaphoreBeforeWalkerFound = true;
|
||||
}
|
||||
|
||||
if (static_cast<uint32_t>(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword()) {
|
||||
EXPECT_TRUE(semaphoreBeforeWalkerFound);
|
||||
EXPECT_EQ(debugPauseStateAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode());
|
||||
|
||||
semaphoreAfterWalkerFound = true;
|
||||
break;
|
||||
}
|
||||
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
|
||||
}
|
||||
|
||||
EXPECT_TRUE(semaphoreAfterWalkerFound);
|
||||
|
||||
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
bool pipeControlBeforeWalkerFound = false;
|
||||
bool pipeControlAfterWalkerFound = false;
|
||||
while (pipeControl != cmdList.end()) {
|
||||
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
|
||||
if (static_cast<uint32_t>(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) {
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress());
|
||||
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation());
|
||||
|
||||
pipeControlBeforeWalkerFound = true;
|
||||
}
|
||||
|
||||
if (static_cast<uint32_t>(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) {
|
||||
EXPECT_TRUE(pipeControlBeforeWalkerFound);
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress & 0x0000FFFFFFFFULL), pipeControlCmd->getAddress());
|
||||
EXPECT_EQ(static_cast<uint32_t>(debugPauseStateAddress >> 32), pipeControlCmd->getAddressHigh());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation());
|
||||
|
||||
pipeControlAfterWalkerFound = true;
|
||||
break;
|
||||
}
|
||||
|
||||
pipeControl = find<PIPE_CONTROL *>(++pipeControl, cmdList.end());
|
||||
}
|
||||
|
||||
EXPECT_TRUE(pipeControlAfterWalkerFound);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontInsertPauseCommandsWhenUsingSpecialQueue) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.PauseOnEnqueue.set(0);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
|
||||
|
||||
pCmdQ->setIsSpecialCommandQueue(true);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*pCmdQ);
|
||||
auto &cmdList = hwParser.cmdList;
|
||||
|
||||
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
bool semaphoreBeforeWalkerFound = false;
|
||||
bool semaphoreAfterWalkerFound = false;
|
||||
while (semaphore != cmdList.end()) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphore);
|
||||
if (static_cast<uint32_t>(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword() &&
|
||||
debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
|
||||
semaphoreBeforeWalkerFound = true;
|
||||
}
|
||||
|
||||
if (static_cast<uint32_t>(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword() &&
|
||||
debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) {
|
||||
semaphoreAfterWalkerFound = true;
|
||||
}
|
||||
|
||||
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
|
||||
}
|
||||
|
||||
EXPECT_FALSE(semaphoreBeforeWalkerFound);
|
||||
EXPECT_FALSE(semaphoreAfterWalkerFound);
|
||||
|
||||
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
bool pipeControlBeforeWalkerFound = false;
|
||||
bool pipeControlAfterWalkerFound = false;
|
||||
while (pipeControl != cmdList.end()) {
|
||||
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
|
||||
if (static_cast<uint32_t>(DebugPauseState::waitingForUserStartConfirmation) == pipeControlCmd->getImmediateData()) {
|
||||
pipeControlBeforeWalkerFound = true;
|
||||
}
|
||||
|
||||
if (static_cast<uint32_t>(DebugPauseState::waitingForUserEndConfirmation) == pipeControlCmd->getImmediateData()) {
|
||||
pipeControlAfterWalkerFound = true;
|
||||
}
|
||||
|
||||
pipeControl = find<PIPE_CONTROL *>(++pipeControl, cmdList.end());
|
||||
}
|
||||
|
||||
EXPECT_FALSE(pipeControlBeforeWalkerFound);
|
||||
EXPECT_FALSE(pipeControlAfterWalkerFound);
|
||||
|
||||
pCmdQ->setIsSpecialCommandQueue(false);
|
||||
}
|
||||
|
||||
@@ -656,6 +656,98 @@ TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndNoSuitableReu
|
||||
memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenSettingFlagProgressThenFunctionAsksTwiceForConfirmation) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.PauseOnEnqueue.set(0);
|
||||
testing::internal::CaptureStdout();
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
|
||||
uint32_t confirmationCounter = 0;
|
||||
|
||||
mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() {
|
||||
if (confirmationCounter == 0) {
|
||||
EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress);
|
||||
confirmationCounter++;
|
||||
} else if (confirmationCounter == 1) {
|
||||
EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress);
|
||||
confirmationCounter++;
|
||||
}
|
||||
};
|
||||
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation;
|
||||
|
||||
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation)
|
||||
;
|
||||
|
||||
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation;
|
||||
|
||||
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserEndConfirmation)
|
||||
;
|
||||
|
||||
mockCSR->userPauseConfirmation.join();
|
||||
|
||||
EXPECT_EQ(2u, confirmationCounter);
|
||||
|
||||
auto output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload")));
|
||||
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue")));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtFirstStageThenFunctionEndsCorrectly) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.PauseOnEnqueue.set(0);
|
||||
testing::internal::CaptureStdout();
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
|
||||
uint32_t confirmationCounter = 0;
|
||||
|
||||
mockCSR->debugConfirmationFunction = [&confirmationCounter]() {
|
||||
confirmationCounter++;
|
||||
};
|
||||
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
*mockCSR->debugPauseStateAddress = DebugPauseState::terminate;
|
||||
mockCSR->userPauseConfirmation.join();
|
||||
|
||||
EXPECT_EQ(0u, confirmationCounter);
|
||||
auto output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_EQ(0u, output.length());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenDebugPauseThreadWhenTerminatingAtSecondStageThenFunctionEndsCorrectly) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.PauseOnEnqueue.set(0);
|
||||
testing::internal::CaptureStdout();
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
|
||||
uint32_t confirmationCounter = 0;
|
||||
|
||||
mockCSR->debugConfirmationFunction = [&confirmationCounter]() {
|
||||
confirmationCounter++;
|
||||
};
|
||||
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
*mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation;
|
||||
|
||||
while (*mockCSR->debugPauseStateAddress != DebugPauseState::hasUserStartConfirmation)
|
||||
;
|
||||
|
||||
*mockCSR->debugPauseStateAddress = DebugPauseState::terminate;
|
||||
mockCSR->userPauseConfirmation.join();
|
||||
|
||||
auto output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_THAT(output, testing::HasSubstr(std::string("Debug break: Press enter to start workload")));
|
||||
EXPECT_THAT(output, testing::Not(testing::HasSubstr(std::string("Debug break: Workload ended, press enter to continue"))));
|
||||
EXPECT_EQ(1u, confirmationCounter);
|
||||
}
|
||||
|
||||
class CommandStreamReceiverWithAubSubCaptureTest : public CommandStreamReceiverTest,
|
||||
public ::testing::WithParamInterface<std::pair<bool, bool>> {};
|
||||
|
||||
|
||||
@@ -47,6 +47,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
|
||||
using BaseClass::CommandStreamReceiver::cleanupResources;
|
||||
using BaseClass::CommandStreamReceiver::commandStream;
|
||||
using BaseClass::CommandStreamReceiver::debugConfirmationFunction;
|
||||
using BaseClass::CommandStreamReceiver::debugPauseStateAddress;
|
||||
using BaseClass::CommandStreamReceiver::dispatchMode;
|
||||
using BaseClass::CommandStreamReceiver::executionEnvironment;
|
||||
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
|
||||
@@ -84,6 +86,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::taskLevel;
|
||||
using BaseClass::CommandStreamReceiver::timestampPacketAllocator;
|
||||
using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled;
|
||||
using BaseClass::CommandStreamReceiver::userPauseConfirmation;
|
||||
using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList;
|
||||
|
||||
UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex), recursiveLockCounter(0),
|
||||
|
||||
@@ -35,8 +35,7 @@ OverrideGpuAddressSpace = -1
|
||||
OverrideMaxWorkgroupSize = -1
|
||||
DoCpuCopyOnReadBuffer = -1
|
||||
DoCpuCopyOnWriteBuffer = -1
|
||||
AddBlockingSemaphoreAfterSpecificEnqueue = -1
|
||||
AddCacheFlushBeforeBlockingSemaphore = 0
|
||||
PauseOnEnqueue = -1
|
||||
EnableDebugBreak = 1
|
||||
FlushAllCaches = 0
|
||||
MakeEachEnqueueBlocking = 0
|
||||
|
||||
@@ -49,6 +49,11 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
|
||||
}
|
||||
|
||||
CommandStreamReceiver::~CommandStreamReceiver() {
|
||||
if (userPauseConfirmation.joinable()) {
|
||||
*debugPauseStateAddress = DebugPauseState::terminate;
|
||||
userPauseConfirmation.join();
|
||||
}
|
||||
|
||||
for (int i = 0; i < IndirectHeap::NUM_TYPES; ++i) {
|
||||
if (indirectHeap[i] != nullptr) {
|
||||
auto allocation = indirectHeap[i]->getGraphicsAllocation();
|
||||
@@ -238,6 +243,8 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
|
||||
this->tagAllocation = allocation;
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
|
||||
this->debugPauseStateAddress = reinterpret_cast<DebugPauseState *>(
|
||||
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset);
|
||||
}
|
||||
|
||||
FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {
|
||||
@@ -390,6 +397,36 @@ bool CommandStreamReceiver::initializeTagAllocation() {
|
||||
|
||||
this->setTagAllocation(tagAllocation);
|
||||
*this->tagAddress = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag;
|
||||
*this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore;
|
||||
|
||||
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
|
||||
userPauseConfirmation = std::thread(
|
||||
[this]() {
|
||||
while (*debugPauseStateAddress != DebugPauseState::waitingForUserStartConfirmation) {
|
||||
if (*debugPauseStateAddress == DebugPauseState::terminate) {
|
||||
return;
|
||||
}
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
std::cout << "Debug break: Press enter to start workload" << std::endl;
|
||||
debugConfirmationFunction();
|
||||
|
||||
*debugPauseStateAddress = DebugPauseState::hasUserStartConfirmation;
|
||||
|
||||
while (*debugPauseStateAddress != DebugPauseState::waitingForUserEndConfirmation) {
|
||||
if (*debugPauseStateAddress == DebugPauseState::terminate) {
|
||||
return;
|
||||
}
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
std::cout << "Debug break: Workload ended, press enter to continue" << std::endl;
|
||||
debugConfirmationFunction();
|
||||
|
||||
*debugPauseStateAddress = DebugPauseState::hasUserEndConfirmation;
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -52,6 +52,16 @@ enum class DispatchMode {
|
||||
BatchedDispatch // dispatching is batched, explicit clFlush is required
|
||||
};
|
||||
|
||||
enum class DebugPauseState : uint32_t {
|
||||
disabled,
|
||||
waitingForFirstSemaphore,
|
||||
waitingForUserStartConfirmation,
|
||||
hasUserStartConfirmation,
|
||||
waitingForUserEndConfirmation,
|
||||
hasUserEndConfirmation,
|
||||
terminate
|
||||
};
|
||||
|
||||
class CommandStreamReceiver {
|
||||
public:
|
||||
enum class SamplerCacheFlushState {
|
||||
@@ -59,6 +69,7 @@ class CommandStreamReceiver {
|
||||
samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image
|
||||
samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image
|
||||
};
|
||||
|
||||
using MutexType = std::recursive_mutex;
|
||||
CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex);
|
||||
virtual ~CommandStreamReceiver();
|
||||
@@ -101,6 +112,7 @@ class CommandStreamReceiver {
|
||||
return tagAllocation;
|
||||
}
|
||||
volatile uint32_t *getTagAddress() const { return tagAddress; }
|
||||
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
|
||||
|
||||
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; };
|
||||
|
||||
@@ -231,6 +243,13 @@ class CommandStreamReceiver {
|
||||
LinearStream commandStream;
|
||||
|
||||
volatile uint32_t *tagAddress = nullptr;
|
||||
volatile DebugPauseState *debugPauseStateAddress = nullptr;
|
||||
|
||||
// offset for debug state must be 8 bytes, if only 4 bytes are used tag writes overwrite it
|
||||
const uint64_t debugPauseStateAddressOffset = 8;
|
||||
|
||||
std::thread userPauseConfirmation;
|
||||
std::function<void()> debugConfirmationFunction = []() { std::cin.get(); };
|
||||
|
||||
GraphicsAllocation *tagAllocation = nullptr;
|
||||
GraphicsAllocation *globalFenceAllocation = nullptr;
|
||||
@@ -253,6 +272,7 @@ class CommandStreamReceiver {
|
||||
|
||||
// taskCount - # of tasks submitted
|
||||
uint32_t taskCount = 0;
|
||||
|
||||
uint32_t lastSentL3Config = 0;
|
||||
uint32_t latestSentStatelessMocsConfig = 0;
|
||||
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
@@ -45,8 +45,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1:
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnReadBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Read Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnWriteBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Write Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AddBlockingSemaphoreAfterSpecificEnqueue, -1, "-1: Disabled. >=0: Zero based enqueue index. For debug only. It may not work correctly with multi CSR submissions")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AddCacheFlushBeforeBlockingSemaphore, false, "Add stalling pipe_control with cache flush before semaphore. Works only with AddBlockingSemaphoreAfterSpecificEnqueue>=0")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PauseOnEnqueue, -1, "-1: default x: pause on enqueue number x and ask for user confirmation before and after execution, counted from 0")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs")
|
||||
DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches")
|
||||
DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue")
|
||||
|
||||
Reference in New Issue
Block a user