diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index d361f1a02d..b957ca41b2 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -125,8 +125,7 @@ inline void HardwareInterface::programWalker( auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred()); if (partitionWalker) { - auto csr = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver; - const uint64_t workPartitionAllocationGpuVa = csr->getWorkPartitionAllocationGpuAddress(); + const uint64_t workPartitionAllocationGpuVa = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); uint32_t partitionCount = 0u; ImplicitScalingDispatch::dispatchCommands(commandStream, walkerCmd, @@ -134,7 +133,6 @@ inline void HardwareInterface::programWalker( partitionCount, false, false, - csr->getDcFlushSupport(), kernel.usesImages(), workPartitionAllocationGpuVa, hwInfo); diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index 061ee6ccfe..1400b29496 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -452,7 +452,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); - testArgs.dcFlushEnable = rootCsr->getDcFlushSupport(); WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, @@ -496,7 +495,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); - testArgs.dcFlushEnable = rootCsr->getDcFlushSupport(); + WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp index 69312563a5..1a6d894513 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp @@ -992,7 +992,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB testArgs.synchronizeBeforeExecution = false; testArgs.secondaryBatchBuffer = false; testArgs.emitSelfCleanup = false; - testArgs.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + WalkerPartition::constructDynamicallyPartitionedCommandBuffer( streamCpuPointer, taskStream->getGraphicsAllocation()->getGpuAddress(), diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 64f2268ec0..b3e6afa9d3 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -1112,7 +1112,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.partitionCount = 2u; - testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); @@ -1170,7 +1169,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.crossTileAtomicSynchronization = false; testArgs.emitPipeControlStall = false; testArgs.partitionCount = 2u; - testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); @@ -1243,7 +1241,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), true); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; @@ -1257,7 +1254,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.emitPipeControlStall = true; testArgs.crossTileAtomicSynchronization = true; testArgs.partitionCount = 16u; - testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); @@ -1274,7 +1270,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; @@ -1288,7 +1283,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.emitPipeControlStall = false; testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; - testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 440fcb03ea..629bdb64e4 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -308,7 +308,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis !container.getFlushTaskUsedForImmediate(), !args.isKernelDispatchedFromImmediateCmdList, false, - args.dcFlushEnable, workPartitionAllocationGpuVa, hwInfo); } else { diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index 42a31d8772..a9874e6029 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -51,7 +51,6 @@ struct ImplicitScalingDispatch { bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, - bool dcFlush, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo); diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 9f2bc7e1f3..86b78b32fa 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -21,8 +21,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar bool emitSelfCleanup, bool preferStaticPartitioning, bool staticPartitioning, - bool useSecondaryBatchBuffer, - bool dcFlush) { + bool useSecondaryBatchBuffer) { WalkerPartition::WalkerPartitionArgs args = {}; args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa; @@ -44,8 +43,6 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar args.emitBatchBufferEnd = false; args.secondaryBatchBuffer = useSecondaryBatchBuffer; - args.dcFlushEnable = dcFlush; - return args; } @@ -73,7 +70,6 @@ size_t ImplicitScalingDispatch::getSize(bool apiSelfCleanup, apiSelfCleanup, preferStaticPartitioning, staticPartitioning, - false, false); return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer(args)); @@ -87,7 +83,6 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, - bool dcFlush, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo) { uint32_t totalProgrammedSize = 0u; @@ -103,8 +98,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS apiSelfCleanup, preferStaticPartitioning, staticPartitioning, - useSecondaryBatchBuffer, - dcFlush); + useSecondaryBatchBuffer); auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}); void *commandBuffer = commandStream.getSpace(dispatchCommandsSize); diff --git a/shared/source/command_container/walker_partition_interface.h b/shared/source/command_container/walker_partition_interface.h index 874d4af822..a5de13aca3 100644 --- a/shared/source/command_container/walker_partition_interface.h +++ b/shared/source/command_container/walker_partition_interface.h @@ -29,7 +29,6 @@ struct WalkerPartitionArgs { bool emitPipeControlStall = false; bool preferredStaticPartitioning = false; bool usePostSync = false; - bool dcFlushEnable = false; }; constexpr uint32_t wparidCCSOffset = 0x221C; diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 8765844c50..b62c5f18e7 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -573,9 +573,9 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, } if (args.emitPipeControlStall) { - NEO::PipeControlArgs pipeControlArgs; - pipeControlArgs.dcFlushEnable = args.dcFlushEnable; - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs); + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } if (args.semaphoreProgrammingRequired) { @@ -703,9 +703,9 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, } if (args.emitPipeControlStall) { - NEO::PipeControlArgs pipeControlArgs; - pipeControlArgs.dcFlushEnable = args.dcFlushEnable; - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs); + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } // Synchronize tiles after walker diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 4aec11a64c..fb1272d2c0 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1092,7 +1092,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getHardwareInfo()); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); @@ -1128,7 +1127,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp args.emitPipeControlStall = true; args.partitionCount = dispatchArgs.partitionCount; args.emitSelfCleanup = true; - args.dcFlushEnable = dispatchArgs.dcFlushEnable; auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset(args); uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index e6d31e2bbd..4a743d0d36 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -30,7 +30,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(2u, partitionCount); @@ -72,7 +72,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -115,7 +115,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -161,7 +161,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -213,7 +213,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -267,7 +267,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -318,7 +318,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -355,7 +355,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -392,7 +392,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -440,7 +440,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -508,7 +508,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -568,7 +568,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -628,7 +628,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -695,7 +695,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -758,7 +758,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -823,7 +823,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -891,7 +891,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -958,7 +958,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 417c457cb5..dc49ef80e4 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -12,8 +12,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { testArgs.partitionCount = 16u; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -103,7 +101,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -159,8 +157,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; testArgs.workPartitionAllocationGpuVa = 0x8000444000; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - auto walker = createWalker(postSyncAddress); uint32_t totalBytesProgrammed{}; @@ -194,7 +190,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -237,8 +233,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.partitionCount = testArgs.tileCount; checkForProperCmdBufferAddressOffset = false; testArgs.synchronizeBeforeExecution = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; testArgs.workPartitionAllocationGpuVa = 0x8000444000; @@ -295,7 +289,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -338,8 +332,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.crossTileAtomicSynchronization = false; testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -376,7 +368,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -423,7 +415,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.partitionCount = testArgs.tileCount; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; @@ -474,7 +465,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -574,8 +565,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.partitionCount = testArgs.tileCount; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -625,7 +614,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -725,8 +714,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.useAtomicsForSelfCleanup = true; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -778,7 +765,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -883,8 +870,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.emitSelfCleanup = true; testArgs.useAtomicsForSelfCleanup = true; testArgs.staticPartitioning = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -936,7 +921,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1040,7 +1025,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti testArgs.tileCount = 4u; testArgs.partitionCount = 16u; testArgs.emitBatchBufferEnd = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; @@ -1129,7 +1113,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -1274,7 +1258,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr testArgs.staticPartitioning = staticPartitioning; testArgs.preferredStaticPartitioning = preferredStaticPartitioning; testArgs.workPartitionAllocationGpuVa = 0x800BADA55000; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + sizeof(WalkerPartition::MI_ATOMIC) * 2 + @@ -1349,7 +1332,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp index 573f13f28f..63af96de25 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp @@ -839,8 +839,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) { testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - checkForProperCmdBufferAddressOffset = false; testArgs.emitSelfCleanup = true; uint64_t gpuVirtualAddress = 0x8000123000; @@ -941,7 +939,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1045,8 +1043,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse checkForProperCmdBufferAddressOffset = false; testArgs.emitSelfCleanup = true; testArgs.useAtomicsForSelfCleanup = true; - testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); - uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; WalkerPartition::COMPUTE_WALKER walker; @@ -1146,7 +1142,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp index a5871d778c..f469000ad4 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp @@ -30,8 +30,6 @@ void ImplicitScalingFixture::setUp() { commandStream.replaceGraphicsAllocation(&cmdBufferAlloc); testHardwareInfo = *defaultHwInfo; - - dcFlushFlag = pDevice->getDefaultEngine().commandStreamReceiver->getDcFlushSupport(); } void ImplicitScalingFixture::tearDown() { diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.h b/shared/test/unit_test/fixtures/implicit_scaling_fixture.h index 420c364794..271d092d6b 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.h +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.h @@ -33,7 +33,6 @@ struct ImplicitScalingFixture : public CommandEncodeStatesFixture { DeviceBitfield singleTile; DeviceBitfield twoTile; void *alignedMemory = nullptr; - bool dcFlushFlag = false; }; using ImplicitScalingTests = Test;