diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index fd97df19e2..e2acc5c3d5 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -156,7 +156,7 @@ struct CommandQueueHw : public CommandQueueImp { inline void assignCsrTaskCountToFenceIfAvailable(ze_fence_handle_t hFence); inline void dispatchTaskCountPostSyncRegular(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream); inline void dispatchTaskCountPostSyncByMiFlushDw(bool isDispatchTaskCountPostSyncRequired, NEO::LinearStream &commandStream); - inline NEO::SubmissionStatus prepareAndSubmitBatchBuffer(CommandListExecutionContext &ctx, NEO::LinearStream &innerCommandStream); + NEO::SubmissionStatus prepareAndSubmitBatchBuffer(CommandListExecutionContext &ctx, NEO::LinearStream &innerCommandStream); inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired); inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer(); inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 9a5112df37..eaa6004222 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -1017,7 +1017,15 @@ NEO::SubmissionStatus CommandQueueHw::prepareAndSubmitBatchBuffer *(MI_BATCH_BUFFER_END *)buffer = GfxFamily::cmdInitBatchBufferEnd; } - if (this->alignedChildStreamPadding) { + if (ctx.isNEODebuggerActive(this->device) || NEO::DebugManager.flags.EnableSWTags.get()) { + auto leftoverSpace = outerCommandStream.getUsed() - innerCommandStream.getUsed(); + leftoverSpace -= ptrDiff(innerCommandStream.getCpuBase(), outerCommandStream.getCpuBase()); + + if (leftoverSpace > 0) { + auto memory = innerCommandStream.getSpace(leftoverSpace); + memset(memory, 0, leftoverSpace); + } + } else if (this->alignedChildStreamPadding) { void *paddingPtr = innerCommandStream.getSpace(this->alignedChildStreamPadding); memset(paddingPtr, 0, this->alignedChildStreamPadding); } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h index 5778c2b076..2c070f73fc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h @@ -61,6 +61,7 @@ template struct MockCommandQueueHw : public L0::CommandQueueHw { using BaseClass = ::L0::CommandQueueHw; using BaseClass::commandStream; + using BaseClass::prepareAndSubmitBatchBuffer; using BaseClass::printfFunctionContainer; using L0::CommandQueue::activeSubDevices; using L0::CommandQueue::internalUsage; @@ -68,6 +69,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw { using L0::CommandQueue::partitionCount; using L0::CommandQueue::preemptionCmdSyncProgramming; using L0::CommandQueueImp::csr; + using typename BaseClass::CommandListExecutionContext; MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw(device, csr, desc) { } diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index eaf8fe00b0..597bfe9b7d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -377,6 +377,39 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } +HWTEST2_F(CommandQueueCreate, givenSwTagsEnabledWhenPrepareAndSubmitBatchBufferThenLeftoverIsZeroed, IsAtLeastSkl) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableSWTags.set(1); + const ze_command_queue_desc_t desc = {}; + auto commandQueue = new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc); + commandQueue->initialize(false, false); + ze_result_t returnValue; + auto commandList = std::unique_ptr(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); + ASSERT_NE(nullptr, commandList); + auto &commandStream = commandQueue->commandStream; + + auto estimatedSize = 4096u; + NEO::LinearStream linearStream(commandStream->getSpace(estimatedSize), estimatedSize); + // fill with random data + memset(commandStream->getCpuBase(), 0xD, estimatedSize); + typename MockCommandQueueHw::CommandListExecutionContext ctx{}; + + commandQueue->prepareAndSubmitBatchBuffer(ctx, linearStream); + + // MI_BATCH_BUFFER END will be added during prepareAndSubmitBatchBuffer + auto offsetInBytes = sizeof(typename FamilyType::MI_BATCH_BUFFER_END); + auto isLeftoverZeroed = true; + for (auto i = offsetInBytes; i < estimatedSize; i++) { + uint8_t *data = reinterpret_cast(commandStream->getCpuBase()); + if (data[i] != 0) { + isLeftoverZeroed = false; + break; + } + } + EXPECT_TRUE(isLeftoverZeroed); + commandQueue->destroy(); +} + template struct MockCommandQueueHwEstimateSizeTest : public MockCommandQueueHw { diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp index 35388466f6..7bd5923652 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp @@ -330,6 +330,35 @@ HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenSbaB commandList->destroy(); } +HWTEST2_F(L0DebuggerTest, givenDebugerEnabledWhenPrepareAndSubmitBatchBufferThenLeftoverIsZeroed, Gen12Plus) { + ze_command_queue_desc_t queueDesc = {}; + std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); + commandQueue->initialize(false, false); + + auto &commandStream = commandQueue->commandStream; + auto estimatedSize = 4096u; + NEO::LinearStream linearStream(commandStream->getSpace(estimatedSize), estimatedSize); + // fill with random data + memset(commandStream->getCpuBase(), 0xD, estimatedSize); + + typename MockCommandQueueHw::CommandListExecutionContext ctx{}; + ctx.isDebugEnabled = true; + + commandQueue->prepareAndSubmitBatchBuffer(ctx, linearStream); + + // MI_BATCH_BUFFER END is added during prepareAndSubmitBatchBuffer + auto offsetInBytes = sizeof(typename FamilyType::MI_BATCH_BUFFER_END); + auto isLeftoverZeroed = true; + for (auto i = offsetInBytes; i < estimatedSize; i++) { + uint8_t *data = reinterpret_cast(commandStream->getCpuBase()); + if (data[i] != 0) { + isLeftoverZeroed = false; + break; + } + } + EXPECT_TRUE(isLeftoverZeroed); +} + INSTANTIATE_TEST_CASE_P(SBAModesForDebugger, L0DebuggerParameterizedTests, ::testing::Values(0, 1)); struct L0DebuggerSingleAddressSpace : public Test {