diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 5a9666dbdd..794923066b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -107,22 +107,17 @@ HWTEST2_F(MultiTileCopyEngineCommandListTest, GivenMultiTileDeviceWhenCreatingCo } using CommandListExecuteImmediate = Test; -HWTEST_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenRequiredStreamStateIsCorrectlyReported) { +HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenRequiredStreamStateIsCorrectlyReported, IsAtMostXe3Core) { DebugManagerStateRestore restorer; debugManager.flags.UseImmediateFlushTask.set(0); - UnitTestSetter::disableHeaplessStateInit(restorer); - auto &compilerProductHelper = device->getCompilerProductHelper(); + auto &productHelper = device->getProductHelper(); - auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo); std::unique_ptr commandList; const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue)); auto &commandListImmediate = static_cast &>(*commandList); - if (commandListImmediate.isHeaplessStateInitEnabled()) { - GTEST_SKIP(); - } auto ¤tCsrStreamProperties = commandListImmediate.getCsr(false)->getStreamProperties(); @@ -147,11 +142,9 @@ HWTEST_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlush int expectedDisableEuFusion = frontEndPropertiesSupport.disableEuFusion ? 1 : -1; expectedDisableOverdispatch = frontEndPropertiesSupport.disableOverdispatch ? expectedDisableOverdispatch : -1; - if (heaplessEnabled == false) { - EXPECT_EQ(expectedComputeDispatchAllWalkerEnable, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); - EXPECT_EQ(expectedDisableEuFusion, currentCsrStreamProperties.frontEndState.disableEUFusion.value); - EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); - } + EXPECT_EQ(expectedComputeDispatchAllWalkerEnable, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); + EXPECT_EQ(expectedDisableEuFusion, currentCsrStreamProperties.frontEndState.disableEUFusion.value); + EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); EXPECT_EQ(expectedThreadArbitrationPolicy, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); @@ -171,11 +164,9 @@ HWTEST_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlush expectedDisableOverdispatch = frontEndPropertiesSupport.disableOverdispatch ? 0 : -1; expectedDisableEuFusion = frontEndPropertiesSupport.disableEuFusion ? 0 : -1; - if (heaplessEnabled == false) { - EXPECT_EQ(expectedComputeDispatchAllWalkerEnable, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); - EXPECT_EQ(expectedDisableEuFusion, currentCsrStreamProperties.frontEndState.disableEUFusion.value); - EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); - } + EXPECT_EQ(expectedComputeDispatchAllWalkerEnable, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); + EXPECT_EQ(expectedDisableEuFusion, currentCsrStreamProperties.frontEndState.disableEUFusion.value); + EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); EXPECT_EQ(expectedThreadArbitrationPolicy, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index f8a5ac2585..cc33e6fd44 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -764,7 +764,9 @@ HWTEST_F(L0DebuggerTest, givenFlushTaskSubmissionAndSharedHeapsEnabledWhenAppend returnValue = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - ASSERT_EQ(0u, debugSurfaceState->getSurfaceBaseAddress()); + if (!commandList->isHeaplessStateInitEnabled()) { + ASSERT_EQ(0u, debugSurfaceState->getSurfaceBaseAddress()); + } kernelImmData->isaGraphicsAllocation.reset(nullptr); commandList->destroy(); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp index 9742d0187d..5d473f0f64 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp @@ -222,7 +222,13 @@ HWTEST2_F(singleAddressSpaceModeTest, givenUseCsrImmediateSubmissionEnabledAndSh break; } } - EXPECT_TRUE(gpr15Found); + + if (csr.getHeaplessStateInitEnabled()) { + EXPECT_FALSE(gpr15Found); + } else { + EXPECT_TRUE(gpr15Found); + } + commandList->destroy(); } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 1e3bb8fb7e..5a3e7740a0 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -964,24 +964,15 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast(dispatchFlags.preemptionMode)); - CompletionStamp completionStamp = getHeaplessStateInitEnabled() ? csr.flushTaskStateless( - commandStream, - commandStreamStart, - dsh, - ioh, - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()) - : csr.flushTask( - commandStream, - commandStreamStart, - dsh, - ioh, - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()); + CompletionStamp completionStamp = csr.flushTask( + commandStream, + commandStreamStart, + dsh, + ioh, + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); @@ -1193,24 +1184,15 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } - completionStamp = getHeaplessStateInitEnabled() ? getGpgpuCommandStreamReceiver().flushTaskStateless( - *commandStream, - commandStreamStart, - &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()) - : getGpgpuCommandStreamReceiver().flushTask( - *commandStream, - commandStreamStart, - &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()); + completionStamp = getGpgpuCommandStreamReceiver().flushTask( + *commandStream, + commandStreamStart, + &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 79e5bd3dc3..72585be734 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -91,22 +91,14 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate gtpinNotifyPreFlushTask(&commandQueue); - completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(queueCommandStream, - offset, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()) - : commandStreamReceiver.flushTask(queueCommandStream, - offset, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandStreamReceiver.flushTask(queueCommandStream, + offset, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()); commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::dependencyResolveOnGpu); @@ -263,22 +255,14 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term } } - completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream, - 0, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - commandQueue.getDevice()) - : commandStreamReceiver.flushTask(*kernelOperation->commandStream, - 0, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, + 0, + dsh, + ioh, + ssh, + taskLevel, + dispatchFlags, + commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); @@ -436,22 +420,14 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term gtpinNotifyPreFlushTask(&commandQueue); - completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream, - 0, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()) - : commandStreamReceiver.flushTask(*kernelOperation->commandStream, - 0, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, + 0, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 18a4c2a98b..0fcc253ec9 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -167,12 +167,7 @@ struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw(0u)}; - } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { return CompletionStamp{0u, 0u, static_cast(0u)}; } }; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 2db368df27..9ae12f76d3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -186,6 +186,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndMidThread debugManager.flags.ForcePreemptionMode.set(static_cast(NEO::PreemptionMode::MidThread)); auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &mockCsr = mockDevice->getUltCommandStreamReceiver(); + + if (mockCsr.getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } + mockCsr.overrideDispatchPolicy(DispatchMode::batchedDispatch); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; @@ -222,7 +227,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndMidThread SipKernel::freeSipKernels(&mockDevice->getRootDeviceEnvironmentRef(), mockDevice->getMemoryManager()); } -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { +HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident, IsAtMostXe3Core) { EnvironmentWithCsrWrapper environment; environment.setCsrType>(); DebugManagerStateRestore dbgRestorer; @@ -1055,7 +1060,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenFlu commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP)); flushTask(commandStreamReceiver); - EXPECT_EQ(1, commandStreamReceiver.flushCount); + EXPECT_EQ(commandStreamReceiver.getHeaplessStateInitEnabled() + 1, commandStreamReceiver.flushCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenChainWithBatchBufferStart) { @@ -1301,6 +1306,9 @@ struct CommandStreamReceiverFlushTaskTestsWithMockCsrHw2DebugFlag : public UltCo HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2DebugFlag, givenDispatchFlagsWhenCallFlushTaskThenThreadArbitrationPolicyIsSetProperly) { auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); + if (mockCsr->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -1349,6 +1357,9 @@ HWTEST_P(CommandStreamReceiverFlushTaskMemoryCompressionTests, givenCsrWithMemor HWTEST_P(CommandStreamReceiverFlushTaskMemoryCompressionTests, givenCsrWithMemoryCompressionStateApplicableWhenFlushTaskIsCalledThenUpdateLastMemoryCompressionState) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); + if (mockCsr.getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 5f99648f17..52a9777f22 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -443,7 +443,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, retVal = clReleaseMemObject(buffer); } -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { +HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected, IsAtMostXe3Core) { DebugManagerStateRestore restorer; debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); @@ -516,6 +516,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw, WhenFlushi auto commandStreamReceiver = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); commandStreamReceiver->heaplessModeEnabled = false; + commandStreamReceiver->heaplessStateInitEnabled = false; commandStreamReceiver->setRequiredScratchSizes(1024, 0); // whatever > 0 @@ -652,6 +653,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw, whenScratc auto commandStreamReceiver = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); commandStreamReceiver->heaplessModeEnabled = false; + commandStreamReceiver->heaplessStateInitEnabled = false; auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); @@ -690,6 +692,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw, whenPrivat UnitTestSetter::disableHeapless(restorer); auto commandStreamReceiver = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); commandStreamReceiver->heaplessModeEnabled = false; + commandStreamReceiver->heaplessStateInitEnabled = false; auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); @@ -1219,6 +1222,10 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenComma auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); auto csr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + if (csr->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } + MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); csr->pageTableManager.reset(pageTableManager); MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); @@ -1385,6 +1392,9 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenNullP HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCommandStreamReceiverWhenInitializingPageTableManagerRegisterFailsThenPageTableManagerIsNotInitialized) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); + if (csr->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); csr->pageTableManager.reset(pageTableManager); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 8aa5c7c951..68f43a89ea 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -48,6 +48,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); + if (mockCsr->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } + mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; @@ -252,12 +256,14 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn auto lastbbEndPtr = lastBatchBuffer->batchBuffer.endCmdPtr; + auto flushCalledCountBeforeFlush = mockCsr->flushCalledCount; + mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddress()); - EXPECT_EQ(1u, mockCsr->flushCalledCount); + EXPECT_EQ(flushCalledCountBeforeFlush + 1u, mockCsr->flushCalledCount); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.endCmdPtr, lastbbEndPtr); } @@ -271,6 +277,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; + mockCsr->heaplessStateInitialized = true; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); @@ -485,6 +492,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; mockCsr->flushCalledCount = 0; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); @@ -495,6 +503,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.blocking = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); + auto flushCalledCountBeforeFlush = mockCsr->flushCalledCount; mockCsr->flushTask(commandStream, 0, @@ -505,12 +514,13 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn dispatchFlags, *pDevice); - EXPECT_EQ(1u, mockCsr->flushCalledCount); + EXPECT_EQ(flushCalledCountBeforeFlush + 1u, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); } HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenBufferToFlushWhenFlushTaskCalledThenUpdateFlushStamp) { auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); + mockCsr->heaplessStateInitialized = true; commandStream.getSpace(1); EXPECT_EQ(0u, mockCsr->flushCalledCount); @@ -546,6 +556,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); @@ -577,6 +588,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenUpdat mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); @@ -732,6 +744,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrIn mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -773,6 +786,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueI mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -814,6 +828,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequ mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -882,6 +897,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalRes mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -930,13 +946,18 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalRes HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenCsrInBatchingModeWhenTwoTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { + using MI_NOOP = typename FamilyType::MI_NOOP; + CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); - + MI_NOOP *pCmd = static_cast(commandStream.getSpace(sizeof(MI_NOOP))); + *pCmd = FamilyType::cmdInitNoop; auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); + mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -994,11 +1015,14 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, // make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); - EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); - EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); - - auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); - EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); + if (mockCsr->getHeaplessStateInitEnabled()) { + EXPECT_EQ(cmdList.end(), itorBatchBufferStartSecond); + } else { + EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); + EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); + auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); + EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); + } // first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); @@ -1590,20 +1614,23 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); - auto itorBatchBufferStartThird = find(++itorBatchBufferStartSecond, cmdList.end()); // make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); - EXPECT_NE(cmdList.end(), itorBatchBufferStartThird); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); - EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); - itorPipeControl = find(itorBatchBufferStartSecond, itorBatchBufferStartThird); - EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird); + if (!mockCsr->getHeaplessStateInitEnabled()) { + auto itorBatchBufferStartThird = find(++itorBatchBufferStartSecond, cmdList.end()); + EXPECT_NE(cmdList.end(), itorBatchBufferStartThird); + EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond); + + itorPipeControl = find(itorBatchBufferStartSecond, itorBatchBufferStartThird); + EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird); + } // first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); @@ -1618,7 +1645,7 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, EXPECT_NE(nullptr, ppcAfterChange); } -typedef UltCommandStreamReceiverTest CommandStreamReceiverCleanupTests; +using CommandStreamReceiverCleanupTests = UltCommandStreamReceiverTest; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAllocationsArePresentThenCleanupResourcesOnlyCleansThoseAboveLatestFlushTaskLevel) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); @@ -1660,11 +1687,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAl HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenDispatchFlagsWithThrottleSetToLowWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); + commandStream.getSpace(16u); auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -1691,11 +1720,13 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenDispa HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenDispatchFlagsWithThrottleSetToMediumWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); + commandStream.getSpace(16u); auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -1738,11 +1769,13 @@ HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenComma HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenDispatchFlagsWithThrottleSetToHighWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); + commandStream.getSpace(16u); auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -1819,11 +1852,13 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandStreamReceiverFlushTaskTests, givenEpilogu HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskTestsWithMockCsrHw2, givenDispatchFlagsWithNewSliceCountWhenFlushTaskThenNewSliceCountIsSet) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); + commandStream.getSpace(16u); auto mockCsr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -1869,14 +1904,6 @@ class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver dsh, ioh, ssh, taskLevel, dispatchFlags, device); } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { - savedDispatchFlags = dispatchFlags; - return BaseClass::flushTaskStateless(commandStream, commandStreamStart, - dsh, ioh, ssh, taskLevel, dispatchFlags, device); - } - DispatchFlags savedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; @@ -1993,7 +2020,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferIsAllocatedThe EXPECT_EQ(nullptr, commandStreamReceiver.perDssBackedBuffer); } -HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated) { +HWTEST2_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated, IsAtMostXe3Core) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp index 2230500599..675865ca09 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp @@ -76,7 +76,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, debugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); debugManager.flags.FlattenBatchBufferForAUBDump.set(true); - typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -85,7 +85,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); - + bool heaplessStateInitEnabled = mockCsr->getHeaplessStateInitEnabled(); mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; @@ -101,6 +101,10 @@ HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, --expectedCallsCount; } + if (mockCsr->getHeaplessStateInitEnabled()) { + expectedCallsCount = 6; + } + size_t removePatchInfoDataCount = 4 * UltMemorySynchronizationCommands::getExpectedPipeControlCount(pDevice->getRootDeviceEnvironment()); mockCsr->flushTask(commandStream, @@ -142,11 +146,11 @@ HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddress()); - EXPECT_EQ(1u, mockCsr->flushCalledCount); + EXPECT_EQ(heaplessStateInitEnabled ? 2u : 1u, mockCsr->flushCalledCount); EXPECT_EQ(expectedCallsCount, mockHelper->setPatchInfoDataCalled); EXPECT_EQ(static_cast(removePatchInfoDataCount), mockHelper->removePatchInfoDataCalled); - EXPECT_EQ(4u, mockHelper->registerCommandChunkCalled); - EXPECT_EQ(3u, mockHelper->registerBatchBufferStartAddressCalled); + EXPECT_EQ(heaplessStateInitEnabled ? 3u : 4u, mockHelper->registerCommandChunkCalled); + EXPECT_EQ(heaplessStateInitEnabled ? 2u : 3u, mockHelper->registerBatchBufferStartAddressCalled); } HWTEST_TEMPLATED_F(CommandStreamReceiverFlushTaskGmockTestsWithMockCsrHw2, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataIsNotCollected) { @@ -190,6 +194,10 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverFlushTaskGmockTestsWithMockCsrHw2, give --expectedCallsCount; } + if (mockCsr->getHeaplessStateInitEnabled()) { + expectedCallsCount = 0; + } + mockCsr->flushTask(commandStream, 0, &dsh, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index 0ff5441543..db15b34eaa 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -299,6 +299,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi debugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + if (commandStreamReceiver->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } + pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->resetCommandStreamReceiver(commandStreamReceiver); @@ -489,9 +493,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Wh } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, GivenBlockingWhenFlushingTaskThenPipeControlProgrammedCorrectly) { - typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + commandStreamReceiver->heaplessStateInitialized = true; pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands @@ -501,6 +506,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Gi auto blocking = true; auto &commandStreamTask = commandQueue.getCS(1024); auto &commandStreamCSR = commandStreamReceiver->getCS(); + auto sizeUsedBeforeFlushCSR = commandStreamCSR.getUsed(); + commandStreamReceiver->streamProperties.stateComputeMode.isCoherencyRequired.value = 0; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -520,7 +527,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Gi // Verify that taskCS got modified, while csrCS remained intact EXPECT_GT(commandStreamTask.getUsed(), 0u); - EXPECT_EQ(0u, commandStreamCSR.getUsed()); + EXPECT_EQ(sizeUsedBeforeFlushCSR, commandStreamCSR.getUsed()); // Parse command list to verify that PC got added to taskCS cmdList.clear(); @@ -530,7 +537,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, Gi // Parse command list to verify that PC wasn't added to csrCS cmdList.clear(); - parseCommands(commandStreamCSR, 0); + parseCommands(commandStreamCSR, sizeUsedBeforeFlushCSR); auto numberOfPC = getCommandsList().size(); EXPECT_EQ(0u, numberOfPC); } @@ -540,6 +547,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + mockCsr->heaplessStateInitialized = true; pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(true); @@ -585,7 +593,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi dispatchFlags, *pDevice); - EXPECT_EQ(0u, mockCsr->flushCalledCount); + EXPECT_EQ(mockCsr->getHeaplessStateInitEnabled() ? 1u : 0u, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); @@ -603,6 +611,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::batchedDispatch); + mockCsr->heaplessStateInitialized = true; auto mockedSubmissionsAggregator = new MockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); @@ -614,8 +623,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi mockCsr->streamProperties.stateComputeMode.isCoherencyRequired.value = 0; - commandStream.getSpace(4); - mockCsr->flushTask(commandStream, 4, &dsh, @@ -669,6 +676,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + mockCsr->heaplessStateInitialized = true; pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(true); diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index 1a164d1868..aaca84a5c5 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -99,12 +99,7 @@ struct UltCommandStreamReceiverTest const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { - if (reinterpret_cast *>(&commandStreamReceiver)->heaplessStateInitialized) { - return commandStreamReceiver.flushTaskStateless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); - - } else { - return commandStreamReceiver.flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); - } + return commandStreamReceiver.flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } template @@ -114,35 +109,19 @@ struct UltCommandStreamReceiverTest bool requiresCoherency = false, bool lowPriority = false) { - if (commandStreamReceiver.heaplessStateInitialized) { - flushTaskFlags.blocking = block; - flushTaskFlags.lowPriority = lowPriority; - flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); + flushTaskFlags.blocking = block; + flushTaskFlags.lowPriority = lowPriority; + flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); - return commandStreamReceiver.flushTaskStateless( - commandStream, - startOffset, - &dsh, - &ioh, - &ssh, - taskLevel, - flushTaskFlags, - *pDevice); - } else { - flushTaskFlags.blocking = block; - flushTaskFlags.lowPriority = lowPriority; - flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); - - return commandStreamReceiver.flushTask( - commandStream, - startOffset, - &dsh, - &ioh, - &ssh, - taskLevel, - flushTaskFlags, - *pDevice); - } + return commandStreamReceiver.flushTask( + commandStream, + startOffset, + &dsh, + &ioh, + &ssh, + taskLevel, + flushTaskFlags, + *pDevice); } template diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 76816e00f9..9457b7e1ac 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -228,13 +228,23 @@ class MockCsr1 : public CommandStreamReceiverHw { CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + if (this->getHeaplessStateInitEnabled()) { + return flushTaskHeapless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } else { + return flushTaskHeapful(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + } + + CompletionStamp flushTaskHeapless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, + const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; return CompletionStamp(); } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, - const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + CompletionStamp flushTaskHeapful(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, + const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; return CompletionStamp(); } diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index c07564718e..c2483b7432 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -70,26 +70,6 @@ class MockCommandStreamReceiverHW : public UltCommandStreamReceiver device); } - CompletionStamp flushTaskStateless( - LinearStream &commandStream, - size_t commandStreamStart, - const IndirectHeap *dsh, - const IndirectHeap *ioh, - const IndirectHeap *ssh, - TaskCountType taskLevel, - DispatchFlags &dispatchFlags, - Device &device) override { - stream = &commandStream; - return UltCommandStreamReceiver::flushTaskStateless( - commandStream, - commandStreamStart, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - device); - } LinearStream *stream = nullptr; }; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a17b32695f..ab123c8b09 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -831,6 +831,22 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { return taskCount; }; CompletionStamp flushTask( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) override { + if (getHeaplessStateInitEnabled()) { + return flushTaskHeapless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } else { + return flushTaskHeapful(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + } + + CompletionStamp flushTaskHeapful( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, @@ -843,7 +859,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { return cs; } - CompletionStamp flushTaskStateless( + CompletionStamp flushTaskHeapless( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, diff --git a/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp b/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp index 84cafac5b9..3de07ffe29 100644 --- a/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp @@ -121,6 +121,9 @@ HWTEST2_F(ClCreateCommandQueueWithPropertiesLinux, givenPropertiesWithClQueueSli auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment, rootDeviceIndex, 1); mockCsr->flushInternalCallBase = false; mdevice->resetCommandStreamReceiver(mockCsr); + if (mockCsr->getHeaplessStateInitEnabled()) { + GTEST_SKIP(); + } cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 4c3c4f580d..902b8d5143 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -96,6 +96,7 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(hwInfo); + this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(heaplessModeEnabled); this->evictionAllocations.reserve(2 * MemoryConstants::kiloByte); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 8a5de89dfb..c15e80618b 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -110,10 +110,6 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; - virtual CompletionStamp flushTaskStateless(LinearStream &commandStreamTask, size_t commandStreamTaskStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; - virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0; virtual CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, ImmediateDispatchFlags &dispatchFlags, Device &device) = 0; @@ -499,6 +495,10 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { return externalCondition ? dcFlushSupport : false; } + bool getHeaplessStateInitEnabled() const { + return heaplessStateInitEnabled; + } + bool isTbxMode() const; bool ensureTagAllocationForRootDeviceIndex(uint32_t rootDeviceIndex); @@ -563,6 +563,14 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { bool isLatestFlushIsTaskCountUpdateOnly() const { return latestFlushIsTaskCountUpdateOnly; } protected: + virtual CompletionStamp flushTaskHeapless(LinearStream &commandStreamTask, size_t commandStreamTaskStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; + + virtual CompletionStamp flushTaskHeapful(LinearStream &commandStreamTask, size_t commandStreamTaskStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; + void cleanupResources(); void printDeviceIndex(); void checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation); @@ -699,6 +707,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass { bool forceSkipResourceCleanupRequired = false; bool resourcesInitialized = false; bool heaplessStateInitialized = false; + bool heaplessStateInitEnabled = false; bool doubleSbaWa = false; bool dshSupported = false; bool heaplessModeEnabled = false; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 1a85288cc5..d10547ac53 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -63,10 +63,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; - void addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel); CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override; @@ -209,6 +205,14 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void unblockPagingFenceSemaphore(uint64_t pagingFenceValue) override; protected: + CompletionStamp flushTaskHeapful(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; + + CompletionStamp flushTaskHeapless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; + void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, uint32_t &newL3Config, bool isBcs); void programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index c15fd695c3..fdeceda14c 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -400,6 +400,24 @@ CompletionStamp CommandStreamReceiverHw::flushTask( DispatchFlags &dispatchFlags, Device &device) { + if (this->getHeaplessStateInitEnabled()) { + return flushTaskHeapless(commandStreamTask, commandStreamStartTask, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } else { + return flushTaskHeapful(commandStreamTask, commandStreamStartTask, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } +} + +template +CompletionStamp CommandStreamReceiverHw::flushTaskHeapful( + LinearStream &commandStreamTask, + size_t commandStreamStartTask, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) { + DEBUG_BREAK_IF(&commandStreamTask == &commandStream); DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true)); DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); diff --git a/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl index a585d92825..4f0ca207d5 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl @@ -25,7 +25,7 @@ SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSu } template -CompletionStamp CommandStreamReceiverHw::flushTaskStateless( +CompletionStamp CommandStreamReceiverHw::flushTaskHeapless( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index cfed492081..45b4d9448a 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -61,6 +61,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::dcFlushSupport; using BaseClass::directSubmission; using BaseClass::dshState; + using BaseClass::flushTaskHeapful; + using BaseClass::flushTaskHeapless; using BaseClass::getCmdSizeForExceptions; using BaseClass::getCmdSizeForHeaplessPrologue; using BaseClass::getCmdSizeForPrologue; @@ -125,6 +127,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod; using BaseClass::CommandStreamReceiver::gsbaFor32BitProgrammed; using BaseClass::CommandStreamReceiver::heaplessModeEnabled; + using BaseClass::CommandStreamReceiver::heaplessStateInitEnabled; using BaseClass::CommandStreamReceiver::heaplessStateInitialized; using BaseClass::CommandStreamReceiver::immWritePostSyncWriteOffset; using BaseClass::CommandStreamReceiver::initDirectSubmission; @@ -228,15 +231,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { - recordedDispatchFlags = dispatchFlags; - recordedSsh = ssh; - this->lastFlushedCommandStream = &commandStream; - return BaseClass::flushTaskStateless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); - } - CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, ImmediateDispatchFlags &dispatchFlags, diff --git a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h index b1e3a2e3b4..0fdd1f9b61 100644 --- a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h +++ b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h @@ -33,6 +33,7 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver { return AUBCommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, - TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { - recordedDispatchFlags = dispatchFlags; - - return AUBCommandStreamReceiverHw::flushTaskStateless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); - } - DispatchMode peekDispatchMode() const { return this->dispatchMode; } diff --git a/shared/test/common/mocks/mock_command_stream_receiver.cpp b/shared/test/common/mocks/mock_command_stream_receiver.cpp index 605fd6816b..40226615e5 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.cpp +++ b/shared/test/common/mocks/mock_command_stream_receiver.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,12 +23,29 @@ CompletionStamp MockCommandStreamReceiver::flushTask( TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { + + if (this->getHeaplessStateInitEnabled()) { + return flushTaskHeapless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } else { + return flushTaskHeapful(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } +} + +CompletionStamp MockCommandStreamReceiver::flushTaskHeapless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) { ++taskCount; CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()}; return stamp; } -CompletionStamp MockCommandStreamReceiver::flushTaskStateless( +CompletionStamp MockCommandStreamReceiver::flushTaskHeapful( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index aad5add46a..b132e87a09 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -41,6 +41,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { using CommandStreamReceiver::CommandStreamReceiver; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::gpuHangCheckPeriod; + using CommandStreamReceiver::heaplessStateInitEnabled; using CommandStreamReceiver::heaplessStateInitialized; using CommandStreamReceiver::immWritePostSyncWriteOffset; using CommandStreamReceiver::internalAllocationStorage; @@ -131,7 +132,17 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { DispatchFlags &dispatchFlags, Device &device) override; - CompletionStamp flushTaskStateless( + CompletionStamp flushTaskHeapless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) override; + + CompletionStamp flushTaskHeapful( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, @@ -340,6 +351,8 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::feSupportFlags; using CommandStreamReceiver::globalFenceAllocation; + using CommandStreamReceiver::heaplessModeEnabled; + using CommandStreamReceiver::heaplessStateInitEnabled; using CommandStreamReceiver::heaplessStateInitialized; using CommandStreamReceiver::heapStorageRequiresRecyclingTag; using CommandStreamReceiver::immWritePostSyncWriteOffset; @@ -401,19 +414,6 @@ class MockCsrHw2 : public CommandStreamReceiverHw { return completionStamp; } - CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, - const IndirectHeap *dsh, const IndirectHeap *ioh, - const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { - passedDispatchFlags = dispatchFlags; - - recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); - auto completionStamp = CommandStreamReceiverHw::flushTaskStateless(commandStream, commandStreamStart, - dsh, ioh, ssh, taskLevel, dispatchFlags, device); - storeCommandStream(commandStream, commandStreamStart); - - return completionStamp; - } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { if (!skipBlitCalls) { return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, device); diff --git a/shared/test/common/mocks/mock_csr.h b/shared/test/common/mocks/mock_csr.h index 99a564dccb..ba70fd9d6a 100644 --- a/shared/test/common/mocks/mock_csr.h +++ b/shared/test/common/mocks/mock_csr.h @@ -163,32 +163,6 @@ class MockCsr : public MockCsrBase { device); } - CompletionStamp flushTaskStateless( - LinearStream &commandStream, - size_t commandStreamStart, - const IndirectHeap *dsh, - const IndirectHeap *ioh, - const IndirectHeap *ssh, - TaskCountType taskLevel, - DispatchFlags &dispatchFlags, - Device &device) override { - this->flushTaskStamp = *this->executionStamp; - (*this->executionStamp)++; - slmUsedInLastFlushTask = dispatchFlags.useSLM; - this->latestSentTaskCount = ++this->taskCount; - lastTaskLevelToFlushTask = taskLevel; - - return CommandStreamReceiverHw::flushTaskStateless( - commandStream, - commandStreamStart, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - device); - } - bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; } bool slmUsedInLastFlushTask = false; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 538ecfb389..9c8eaee09c 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3198,7 +3198,7 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfHostMemoryFailureOnFlushWhenFlus commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory; auto completionStamp = commandStreamReceiver.flushTask(commandStream, - 0, + 64, &dsh, &ioh, nullptr, @@ -3214,7 +3214,7 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfDeviceMemoryFailureOnFlushWhenFl commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory; auto completionStamp = commandStreamReceiver.flushTask(commandStream, - 0, + 64, &dsh, &ioh, nullptr, @@ -3231,7 +3231,7 @@ HWTEST_F(CommandStreamReceiverHwTest, givenFailedFailureOnFlushWhenFlushingTaskT commandStreamReceiver.flushReturnValue = SubmissionStatus::failed; auto completionStamp = commandStreamReceiver.flushTask(commandStream, - 0, + 64, &dsh, &ioh, nullptr, @@ -3346,7 +3346,7 @@ HWTEST_F(CommandStreamReceiverHwTest, whenFlushTaskCalledThenSetPassNumClients) commandStreamReceiver.registerClient(&client2); commandStreamReceiver.flushTask(commandStream, - 0, + 64, &dsh, &ioh, nullptr, @@ -5284,6 +5284,10 @@ HWTEST2_F(CommandStreamReceiverHwTest, GivenDirtyFlagForContextInBindlessHelperW auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + auto bindlessHeapsHelper = std::make_unique(pDevice, pDevice->getNumGenericSubDevices() > 1); MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release()); @@ -5538,6 +5542,9 @@ HWTEST_F(CommandStreamReceiverHwTest, GivenFlushHeapStorageRequiresRecyclingTagW HWTEST_F(CommandStreamReceiverHwTest, givenEpilogueStreamAvailableWhenFlushTaskCalledThenDispachEpilogueCommandsIntoEpilogueStream) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } GraphicsAllocation *commandBuffer = commandStreamReceiver.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{commandStreamReceiver.getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); @@ -5971,7 +5978,7 @@ HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverF LinearStream commandStream(0, 0); - EXPECT_ANY_THROW(csr->flushTaskStateless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice)); + EXPECT_ANY_THROW(csr->flushTaskHeapless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice)); EXPECT_ANY_THROW(csr->programHeaplessProlog(*pDevice)); EXPECT_ANY_THROW(csr->programStateBaseAddressHeapless(*pDevice, commandStream)); EXPECT_ANY_THROW(csr->programComputeModeHeapless(*pDevice, commandStream)); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index 43d5006491..5a34b15540 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -102,7 +102,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDebugFlagSetWhenSubmittingThenCall for (uint32_t i = 0; i <= expectedExitCounter + 3; i++) { SysCalls::exitCalled = false; - csr->flushTask(cs, 0u, &ih, &ih, &ih, 0u, dispatchFlags, *device); + csr->flushTask(cs, 16u, &ih, &ih, &ih, 0u, dispatchFlags, *device); bool enabled = (i >= expectedExitCounter); @@ -543,12 +543,14 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); testedCsr->useNewResourceImplicitFlush = false; testedCsr->useGpuIdleImplicitFlush = false; + testedCsr->heaplessStateInitialized = true; auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); IndirectHeap cs(commandBuffer); + cs.getSpace(4u); // use some bytes csr->makeResident(*dummyAllocation); @@ -585,7 +587,11 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSe elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), allocations->getGraphicsAllocation(0u)); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); - EXPECT_EQ(testedCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation); + if (testedCsr->getHeaplessStateInitEnabled()) { + EXPECT_EQ(cs.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation); + } else { + EXPECT_EQ(testedCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation); + } int ioctlUserPtrCnt = 3; ioctlUserPtrCnt += testedCsr->clearColorAllocation ? 1 : 0; @@ -610,10 +616,12 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); testedCsr->useNewResourceImplicitFlush = false; testedCsr->useGpuIdleImplicitFlush = false; + testedCsr->heaplessStateInitialized = true; auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(commandBuffer); - + // use some bytes + cs.getSpace(4u); auto allocations = device->getDefaultEngine().commandStreamReceiver->getTagsMultiAllocation(); csr->setTagAllocation(static_cast(allocations->getGraphicsAllocation(csr->getRootDeviceIndex()))); @@ -638,8 +646,10 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen EXPECT_TRUE(cmdBuffers.peekIsEmpty()); - auto commandBufferGraphicsAllocation = submittedCommandBuffer.getGraphicsAllocation(); - EXPECT_TRUE(commandBufferGraphicsAllocation->isResident(csr->getOsContext().getContextId())); + if (!csr->getHeaplessStateInitEnabled()) { + auto commandBufferGraphicsAllocation = submittedCommandBuffer.getGraphicsAllocation(); + EXPECT_TRUE(commandBufferGraphicsAllocation->isResident(csr->getOsContext().getContextId())); + } // preemption allocation size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; @@ -649,8 +659,9 @@ HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhen // validate that submited command buffer has what we want EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.getBufferCount()); - EXPECT_EQ(4u, this->mock->execBuffer.getBatchStartOffset()); - EXPECT_EQ(submittedCommandBuffer.getUsed(), this->mock->execBuffer.getBatchLen()); + + EXPECT_EQ(csr->getHeaplessStateInitEnabled() ? 0u : 4u, this->mock->execBuffer.getBatchStartOffset()); + EXPECT_EQ(csr->getHeaplessStateInitEnabled() ? cs.getUsed() : submittedCommandBuffer.getUsed(), this->mock->execBuffer.getBatchLen()); auto *execObjects = reinterpret_cast(this->mock->execBuffer.getBuffersPtr()); @@ -690,11 +701,13 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenProcessResidencyFailingOnO auto testedCsr = static_cast *>(csr); testedCsr->processResidencyCallBase = false; testedCsr->processResidencyResult = SubmissionStatus::outOfMemory; + auto flushInternalCalledBeforeFlush = testedCsr->flushInternalCalled; + auto processResidencyCalledBeforeFlush = testedCsr->processResidencyCalled; SubmissionStatus ret = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(SubmissionStatus::outOfMemory, ret); - EXPECT_EQ(testedCsr->flushInternalCalled, 1u); - EXPECT_EQ(testedCsr->processResidencyCalled, 1u); + EXPECT_EQ(testedCsr->flushInternalCalled, flushInternalCalledBeforeFlush + 1); + EXPECT_EQ(testedCsr->processResidencyCalled, processResidencyCalledBeforeFlush + 1); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(commandBuffer); } @@ -712,11 +725,13 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenProcessResidencyFailingOnO auto testedCsr = static_cast *>(csr); testedCsr->processResidencyCallBase = false; testedCsr->processResidencyResult = SubmissionStatus::outOfHostMemory; + auto flushInternalCalledBeforeFlush = testedCsr->flushInternalCalled; + auto processResidencyCalledBeforeFlush = testedCsr->processResidencyCalled; SubmissionStatus ret = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(SubmissionStatus::outOfHostMemory, ret); - EXPECT_EQ(testedCsr->flushInternalCalled, 1u); - EXPECT_EQ(testedCsr->processResidencyCalled, 1u); + EXPECT_EQ(testedCsr->flushInternalCalled, flushInternalCalledBeforeFlush + 1); + EXPECT_EQ(testedCsr->processResidencyCalled, processResidencyCalledBeforeFlush + 1); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(commandBuffer); } @@ -736,12 +751,15 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenFailingExecWhenFlushingThe testedCsr->processResidencyResult = SubmissionStatus::success; testedCsr->execCallBase = false; testedCsr->execResult = -1; + auto flushInternalCalledBeforeFlush = testedCsr->flushInternalCalled; + auto processResidencyCalledBeforeFlush = testedCsr->processResidencyCalled; + auto execCalledBeforeFlush = testedCsr->execCalled; SubmissionStatus ret = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(SubmissionStatus::failed, ret); - EXPECT_EQ(testedCsr->flushInternalCalled, 1u); - EXPECT_EQ(testedCsr->processResidencyCalled, 1u); - EXPECT_EQ(testedCsr->execCalled, 1u); + EXPECT_EQ(testedCsr->flushInternalCalled, flushInternalCalledBeforeFlush + 1); + EXPECT_EQ(testedCsr->processResidencyCalled, processResidencyCalledBeforeFlush + 1); + EXPECT_EQ(testedCsr->execCalled, execCalledBeforeFlush + 1); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(commandBuffer); } @@ -922,6 +940,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenDirectSubmissionLi auto testedCsr = static_cast *>(csr); testedCsr->completionFenceValuePointer = nullptr; testedCsr->directSubmission = std::make_unique>(*device->getDefaultEngine().commandStreamReceiver); + testedCsr->heaplessStateInitialized = true; auto oldMemoryOperationsInterface = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.release(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(device->getRootDeviceIndex());