diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 26e648f0b3..3124083763 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -177,6 +177,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K ssh, // surfaceStateHeap dsh, // dynamicStateHeap reinterpret_cast(threadGroupDimensions), // threadGroupDimensions + nullptr, // outWalkerPtr &additionalCommands, // additionalCommands commandListPreemptionMode, // preemptionMode 0, // partitionCount @@ -256,13 +257,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation) { NEO::PipeControlArgs args; - uint64_t counterAddress = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), - NEO::PostSyncMode::ImmediateData, - counterAddress, - this->inOrderDependencyCounter + 1, - args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + appendSignalInOrderDependencyCounter(); } return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index b5b19191ca..1e9f358fc7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -278,6 +278,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K ssh, // surfaceStateHeap dsh, // dynamicStateHeap reinterpret_cast(threadGroupDimensions), // threadGroupDimensions + nullptr, // outWalkerPtr &additionalCommands, // additionalCommands kernelPreemptionMode, // preemptionMode this->partitionCount, // partitionCount @@ -326,10 +327,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } - if (inOrderExecSignalRequired && inOrderNonWalkerSignalling) { - appendWaitOnSingleEvent(event, false); - - appendSignalInOrderDependencyCounter(); + if (inOrderExecSignalRequired) { + if (inOrderNonWalkerSignalling) { + appendWaitOnSingleEvent(event, false); + appendSignalInOrderDependencyCounter(); + } else { + UNRECOVERABLE_IF(!dispatchKernelArgs.outWalkerPtr); + addCmdForPatching(dispatchKernelArgs.outWalkerPtr, dispatchKernelArgs.postSyncImmValue, InOrderPatchCommandTypes::CmdType::Walker); + } } if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) { diff --git a/level_zero/core/source/helpers/in_order_patch_cmds.h b/level_zero/core/source/helpers/in_order_patch_cmds.h index c7ab90ac1d..b50af5fbd1 100644 --- a/level_zero/core/source/helpers/in_order_patch_cmds.h +++ b/level_zero/core/source/helpers/in_order_patch_cmds.h @@ -15,8 +15,10 @@ namespace L0 { namespace InOrderPatchCommandTypes { enum class CmdType { + None, Sdi, - Semaphore + Semaphore, + Walker }; template @@ -24,17 +26,25 @@ struct BaseCmd { BaseCmd(void *cmd, uint64_t baseCounterValue, CmdType cmdType) : cmd(cmd), baseCounterValue(baseCounterValue), cmdType(cmdType) {} void patch(uint64_t appendCunterValue) { - if (CmdType::Sdi == cmdType) { + switch (cmdType) { + case CmdType::Sdi: patchSdi(appendCunterValue); - } else { - UNRECOVERABLE_IF(CmdType::Semaphore != cmdType); + break; + case CmdType::Semaphore: patchSemaphore(appendCunterValue); + break; + case CmdType::Walker: + patchComputeWalker(appendCunterValue); + break; + default: + UNRECOVERABLE_IF(true); + break; } } void *cmd = nullptr; const uint64_t baseCounterValue = 0; - const CmdType cmdType; + const CmdType cmdType = CmdType::None; protected: void patchSdi(uint64_t appendCunterValue) { @@ -48,6 +58,16 @@ struct BaseCmd { semaphoreCmd->setSemaphoreDataDword(static_cast(baseCounterValue + appendCunterValue)); } + void patchComputeWalker(uint64_t appendCunterValue) { + if constexpr (GfxFamily::walkerPostSyncSupport) { + auto walkerCmd = reinterpret_cast(cmd); + auto &postSync = walkerCmd->getPostSync(); + postSync.setImmediateData(baseCounterValue + appendCunterValue); + } else { + UNRECOVERABLE_IF(true); + } + } + BaseCmd() = delete; }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index ca5840f11b..2ede64ecee 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -188,6 +188,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA nullptr, threadGroupDimensions, nullptr, + nullptr, PreemptionMode::MidBatch, 0, 0, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 977fa05519..753d5c6eff 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -629,6 +629,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA nullptr, threadGroupDimensions, nullptr, + nullptr, PreemptionMode::MidBatch, 0, 0, @@ -1600,9 +1601,20 @@ HWTEST2_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSync using NonPostSyncWalkerMatcher = IsWithinGfxCore; +HWTEST2_F(InOrderCmdListTests, givenNonPostSyncWalkerWhenPatchingThenThrow, NonPostSyncWalkerMatcher) { + InOrderPatchCommandTypes::BaseCmd incorrectCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::None); + + EXPECT_ANY_THROW(incorrectCmd.patch(1)); + + InOrderPatchCommandTypes::BaseCmd walkerCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::Walker); + + EXPECT_ANY_THROW(walkerCmd.patch(1)); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramPipeControlWithSignalAllocation, NonPostSyncWalkerMatcher) { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto immCmdList = createImmCmdList(); immCmdList->inOrderAllocationOffset = 64; @@ -1624,12 +1636,18 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP auto pcCmd = genCmdCast(*pcItor); ASSERT_NE(nullptr, pcCmd); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pcCmd->getPostSyncOperation()); + + auto sdiItor = find(pcItor, cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + uint64_t expectedAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + immCmdList->inOrderAllocationOffset; - EXPECT_EQ(static_cast(expectedAddress & 0x0000FFFFFFFFULL), pcCmd->getAddress()); - EXPECT_EQ(static_cast(expectedAddress >> 32), pcCmd->getAddressHigh()); - EXPECT_EQ(static_cast(immCmdList->inOrderDependencyCounter), pcCmd->getImmediateData()); - EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation()); + EXPECT_EQ(expectedAddress, sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); + EXPECT_EQ(immCmdList->inOrderDependencyCounter, sdiCmd->getDataDword0()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenProgramPcAndSignalAlloc, NonPostSyncWalkerMatcher) { @@ -3094,6 +3112,77 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming EXPECT_EQ(eventEndGpuVa + offset, semaphoreCmd->getSemaphoreGraphicsAddress()); } +HWTEST2_F(MultiTileInOrderCmdListTests, givenDebugFlagSetWhenUsingRegularCmdListThenAddWalkerToPatch, IsAtLeastXeHpCore) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + + DebugManager.flags.EnableInOrderRegularCmdListPatching.set(1); + + ze_command_queue_desc_t desc = {}; + + auto mockCmdQHw = makeZeUniquePtr>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc); + mockCmdQHw->initialize(true, false, false); + auto regularCmdList = createRegularCmdList(false); + regularCmdList->partitionCount = 2; + + auto cmdStream = regularCmdList->getCmdContainer().getCommandStream(); + + size_t offset = cmdStream->getUsed(); + + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + ASSERT_EQ(4u, regularCmdList->inOrderPatchCmds.size()); // Walker + 2x Semaphore + Walker + + auto walkerFromContainer1 = genCmdCast(regularCmdList->inOrderPatchCmds[0].cmd); + ASSERT_NE(nullptr, walkerFromContainer1); + auto walkerFromContainer2 = genCmdCast(regularCmdList->inOrderPatchCmds[3].cmd); + ASSERT_NE(nullptr, walkerFromContainer2); + COMPUTE_WALKER *walkerFromParser1 = nullptr; + COMPUTE_WALKER *walkerFromParser2 = nullptr; + + { + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + + walkerFromParser1 = genCmdCast(*itor); + + itor = find(++itor, cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + + walkerFromParser2 = genCmdCast(*itor); + } + + EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + + auto verifyPatching = [&](uint64_t executionCounter) { + auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter; + + EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); + EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); + + EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData()); + EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData()); + }; + + regularCmdList->close(); + + auto handle = regularCmdList->toHandle(); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(0); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(1); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(2); +} + struct BcsSplitInOrderCmdListTests : public InOrderCmdListTests { void SetUp() override { NEO::DebugManager.flags.SplitBcsCopy.set(1); @@ -3250,7 +3339,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenDebugFlagSetWhenUsingRegularCmdListTh EXPECT_EQ(1u, regularCmdList->inOrderPatchCmds.size()); // SDI - auto sdiFromContainer1 = reinterpret_cast(regularCmdList->inOrderPatchCmds[0].cmd); + auto sdiFromContainer1 = genCmdCast(regularCmdList->inOrderPatchCmds[0].cmd); + ASSERT_NE(nullptr, sdiFromContainer1); MI_STORE_DATA_IMM *sdiFromParser1 = nullptr; { @@ -3269,10 +3359,12 @@ HWTEST2_F(InOrderRegularCmdListTests, givenDebugFlagSetWhenUsingRegularCmdListTh regularCmdList->appendMemoryCopy(©Data, ©Data, 1, nullptr, 0, nullptr, false, false); ASSERT_EQ(3u, regularCmdList->inOrderPatchCmds.size()); // SDI + Semaphore + SDI - auto semaphoreFromContainer2 = reinterpret_cast(regularCmdList->inOrderPatchCmds[1].cmd); + auto semaphoreFromContainer2 = genCmdCast(regularCmdList->inOrderPatchCmds[1].cmd); + ASSERT_NE(nullptr, semaphoreFromContainer2); MI_SEMAPHORE_WAIT *semaphoreFromParser2 = nullptr; - auto sdiFromContainer2 = reinterpret_cast(regularCmdList->inOrderPatchCmds[2].cmd); + auto sdiFromContainer2 = genCmdCast(regularCmdList->inOrderPatchCmds[2].cmd); + ASSERT_NE(nullptr, sdiFromContainer2); MI_STORE_DATA_IMM *sdiFromParser2 = nullptr; { @@ -3321,6 +3413,76 @@ HWTEST2_F(InOrderRegularCmdListTests, givenDebugFlagSetWhenUsingRegularCmdListTh verifyPatching(2); } +HWTEST2_F(InOrderRegularCmdListTests, givenDebugFlagSetWhenUsingRegularCmdListThenAddWalkerToPatch, IsAtLeastXeHpCore) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + + DebugManager.flags.EnableInOrderRegularCmdListPatching.set(1); + + ze_command_queue_desc_t desc = {}; + + auto mockCmdQHw = makeZeUniquePtr>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc); + mockCmdQHw->initialize(true, false, false); + auto regularCmdList = createRegularCmdList(false); + + auto cmdStream = regularCmdList->getCmdContainer().getCommandStream(); + + size_t offset = cmdStream->getUsed(); + + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + ASSERT_EQ(3u, regularCmdList->inOrderPatchCmds.size()); // Walker + Semaphore + Walker + + auto walkerFromContainer1 = genCmdCast(regularCmdList->inOrderPatchCmds[0].cmd); + ASSERT_NE(nullptr, walkerFromContainer1); + auto walkerFromContainer2 = genCmdCast(regularCmdList->inOrderPatchCmds[2].cmd); + ASSERT_NE(nullptr, walkerFromContainer2); + COMPUTE_WALKER *walkerFromParser1 = nullptr; + COMPUTE_WALKER *walkerFromParser2 = nullptr; + + { + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + + walkerFromParser1 = genCmdCast(*itor); + + itor = find(++itor, cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + + walkerFromParser2 = genCmdCast(*itor); + } + + EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + + auto verifyPatching = [&](uint64_t executionCounter) { + auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter; + + EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); + EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); + + EXPECT_EQ(2u + appendValue, walkerFromContainer2->getPostSync().getImmediateData()); + EXPECT_EQ(2u + appendValue, walkerFromParser2->getPostSync().getImmediateData()); + }; + + regularCmdList->close(); + + auto handle = regularCmdList->toHandle(); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(0); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(1); + + mockCmdQHw->executeCommandLists(1, &handle, nullptr, false); + verifyPatching(2); +} + HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenProgramPipeControlsToHandleDependencies, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 368b94b326..3315b70b3a 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -140,6 +140,7 @@ inline void HardwareInterface::programWalker( uint32_t partitionCount = 0u; ImplicitScalingDispatch::dispatchCommands(commandStream, walkerCmd, + nullptr, devices, partitionCount, false, diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index 876999d345..2c0e7b63e6 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -454,6 +454,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, + nullptr, taskStreamGpu, &walkerCmd, totalBytesProgrammed, @@ -499,6 +500,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, + nullptr, taskStreamGpu, &walkerCmd, totalBytesProgrammed, diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp index b23dc4a2f9..7ce2ce2e18 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp @@ -1000,6 +1000,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB WalkerPartition::constructDynamicallyPartitionedCommandBuffer( streamCpuPointer, + nullptr, taskStream->getGraphicsAllocation()->getGpuAddress(), &walkerCmd, totalBytesProgrammed, diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 88ce05f05c..f56444035c 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -48,6 +48,7 @@ struct EncodeDispatchKernelArgs { IndirectHeap *surfaceStateHeap = nullptr; IndirectHeap *dynamicStateHeap = nullptr; const void *threadGroupDimensions = nullptr; + void *outWalkerPtr = nullptr; std::list *additionalCommands = nullptr; PreemptionMode preemptionMode = PreemptionMode::Initial; uint32_t partitionCount = 0u; diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 19355787a6..2f7ce1d99c 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -348,6 +348,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } ImplicitScalingDispatch::dispatchCommands(*listCmdBufferStream, walkerCmd, + &args.outWalkerPtr, args.device->getDeviceBitfield(), args.partitionCount, !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), @@ -360,6 +361,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } else { args.partitionCount = 1; auto buffer = listCmdBufferStream->getSpace(sizeof(walkerCmd)); + args.outWalkerPtr = buffer; *(decltype(walkerCmd) *)buffer = walkerCmd; } diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index 8dc062ea57..9de1708ddb 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -48,6 +48,7 @@ struct ImplicitScalingDispatch { static void dispatchCommands(LinearStream &commandStream, WALKER_TYPE &walkerCmd, + void **outWalkerPtr, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 40e442bfc9..8dfd07a3ed 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -89,6 +89,7 @@ size_t ImplicitScalingDispatch::getSize(bool apiSelfCleanup, template void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandStream, WALKER_TYPE &walkerCmd, + void **outWalkerPtr, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, @@ -122,6 +123,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS if (staticPartitioning) { UNRECOVERABLE_IF(tileCount != partitionCount); WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandBuffer, + outWalkerPtr, cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, @@ -137,6 +139,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS } WalkerPartition::constructDynamicallyPartitionedCommandBuffer(commandBuffer, + outWalkerPtr, cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index a311a691d7..19c6464ca0 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -473,10 +473,10 @@ uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) { } template -void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed, - COMPUTE_WALKER *inputWalker, - uint32_t partitionCount, - bool forceExecutionOnSingleTile) { +void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed, + COMPUTE_WALKER *inputWalker, + uint32_t partitionCount, + bool forceExecutionOnSingleTile) { auto computeWalker = putCommand>(inputAddress, totalBytesProgrammed); COMPUTE_WALKER cmd = *inputWalker; @@ -506,6 +506,8 @@ void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramme } } *computeWalker = cmd; + + return computeWalker; } /* SAMPLE COMMAND BUFFER STRUCTURE, birds eye view for 16 partitions, 4 tiles @@ -540,6 +542,7 @@ void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramme template void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, + void **outWalkerPtr, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, @@ -614,7 +617,10 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, args.secondaryBatchBuffer); // Walker section - programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile); + auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile); + if (outWalkerPtr) { + *outWalkerPtr = walkerPtr; + } programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, args.secondaryBatchBuffer); @@ -682,6 +688,7 @@ uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args template void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, + void **outWalkerPtr, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, @@ -704,7 +711,10 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, if (args.initializeWparidRegister) { programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset); } - programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile); + auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile); + if (outWalkerPtr) { + *outWalkerPtr = walkerPtr; + } // Prepare for cleanup section if (args.emitSelfCleanup) { diff --git a/shared/source/gen11/hw_cmds_base.h b/shared/source/gen11/hw_cmds_base.h index 4f5fa38e19..e6e3921a29 100644 --- a/shared/source/gen11/hw_cmds_base.h +++ b/shared/source/gen11/hw_cmds_base.h @@ -130,6 +130,7 @@ struct Gen11Family : public Gen11 { static const MI_FLUSH_DW cmdInitMiFlushDw; static const XY_COLOR_BLT cmdInitXyColorBlt; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = false; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_GEN8_CORE; diff --git a/shared/source/gen12lp/hw_cmds_base.h b/shared/source/gen12lp/hw_cmds_base.h index d8badd62fe..0c20ee7f50 100644 --- a/shared/source/gen12lp/hw_cmds_base.h +++ b/shared/source/gen12lp/hw_cmds_base.h @@ -131,6 +131,7 @@ struct Gen12LpFamily : public Gen12Lp { static const MI_FLUSH_DW cmdInitMiFlushDw; static const XY_FAST_COLOR_BLT cmdInitXyColorBlt; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = false; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_GEN8_CORE; diff --git a/shared/source/gen8/hw_cmds_base.h b/shared/source/gen8/hw_cmds_base.h index 4dc9d0229b..c33bf6197c 100644 --- a/shared/source/gen8/hw_cmds_base.h +++ b/shared/source/gen8/hw_cmds_base.h @@ -130,6 +130,7 @@ struct Gen8Family : public Gen8 { static const MI_FLUSH_DW cmdInitMiFlushDw; static const XY_COLOR_BLT cmdInitXyColorBlt; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = false; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_GEN8_CORE; diff --git a/shared/source/gen9/hw_cmds_base.h b/shared/source/gen9/hw_cmds_base.h index 7ff2faaf3f..33ed66c956 100644 --- a/shared/source/gen9/hw_cmds_base.h +++ b/shared/source/gen9/hw_cmds_base.h @@ -130,6 +130,7 @@ struct Gen9Family : public Gen9 { static const MI_FLUSH_DW cmdInitMiFlushDw; static const XY_COLOR_BLT cmdInitXyColorBlt; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = false; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_GEN8_CORE; diff --git a/shared/source/xe_hp_core/hw_cmds_base.h b/shared/source/xe_hp_core/hw_cmds_base.h index 4338c971bc..2b8f974db1 100644 --- a/shared/source/xe_hp_core/hw_cmds_base.h +++ b/shared/source/xe_hp_core/hw_cmds_base.h @@ -141,6 +141,7 @@ struct XeHpFamily : public XeHpCore { static const _3DSTATE_BTD_BODY cmd3dStateBtdBody; static const STATE_SIP cmdInitStateSip; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = true; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_XE_HP_CORE; diff --git a/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h b/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h index 41b6cb5d68..92bb2b76ce 100644 --- a/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h +++ b/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h @@ -127,6 +127,7 @@ struct XeHpcCoreFamily : public XeHpcCore { static const STATE_SIP cmdInitStateSip; static const STATE_SYSTEM_MEM_FENCE_ADDRESS cmdInitStateSystemMemFenceAddress; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = true; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_XE_HP_CORE; diff --git a/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h b/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h index 43b37aa942..2a7cdd7c7f 100644 --- a/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h +++ b/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h @@ -143,6 +143,7 @@ struct XeHpgCoreFamily : public XeHpgCore { static const _3DSTATE_BTD_BODY cmd3dStateBtdBody; static const STATE_SIP cmdInitStateSip; static constexpr bool isQwordInOrderCounter = false; + static constexpr bool walkerPostSyncSupport = true; static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { return cmdSetBaseFamily == IGFX_XE_HP_CORE; diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index a18fed828c..83458a9040 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -36,7 +36,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -79,7 +79,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, false, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -123,7 +123,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -170,7 +170,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -222,7 +222,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -276,7 +276,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -327,7 +327,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -364,7 +364,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -401,7 +401,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -449,7 +449,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -517,7 +517,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -577,7 +577,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -637,7 +637,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -704,7 +704,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -767,7 +767,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -832,7 +832,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -900,7 +900,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, true, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -967,7 +967,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, nullptr, twoTile, partitionCount, true, false, false, dcFlushFlag, forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 74912cafb6..e09131a6e6 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -49,6 +49,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst testArgs.tileCount = 4u; testArgs.emitBatchBufferEnd = true; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, @@ -171,6 +172,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -254,6 +256,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -355,6 +358,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -444,6 +448,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -596,6 +601,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -748,6 +754,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -907,6 +914,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); const auto finalSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -1083,6 +1091,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti auto totalProgrammedSize = optionalBatchBufferEndOffset + sizeof(WalkerPartition::BATCH_BUFFER_END); WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, @@ -1203,6 +1212,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -1242,6 +1252,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, + nullptr, cmdBufferGpuAddress, &walker, totalBytesProgrammed, @@ -1307,6 +1318,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr testArgs.tileCount = 2; uint64_t gpuVirtualAddress = 0x8000123000; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp index 855d357b60..b581ef50a7 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp @@ -36,6 +36,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst walker.setPartitionType(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X); WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, @@ -882,6 +883,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe testArgs.tileCount = 4u; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, @@ -1088,6 +1090,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse testArgs.tileCount = 4u; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, @@ -1294,6 +1297,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP auto totalProgrammedSize = cleanupSectionOffset; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, + nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, diff --git a/shared/test/unit_test/fixtures/command_container_fixture.cpp b/shared/test/unit_test/fixtures/command_container_fixture.cpp index cedb7bbace..d1d0869949 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.cpp +++ b/shared/test/unit_test/fixtures/command_container_fixture.cpp @@ -43,6 +43,7 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel nullptr, // surfaceStateHeap nullptr, // dynamicStateHeap threadGroupDimensions, // threadGroupDimensions + nullptr, // outWalkerPtr nullptr, // additionalCommands PreemptionMode::Disabled, // preemptionMode 1, // partitionCount