diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index b957ca41b2..d361f1a02d 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -125,7 +125,8 @@ inline void HardwareInterface::programWalker( auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred()); if (partitionWalker) { - const uint64_t workPartitionAllocationGpuVa = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); + auto csr = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver; + const uint64_t workPartitionAllocationGpuVa = csr->getWorkPartitionAllocationGpuAddress(); uint32_t partitionCount = 0u; ImplicitScalingDispatch::dispatchCommands(commandStream, walkerCmd, @@ -133,6 +134,7 @@ inline void HardwareInterface::programWalker( partitionCount, false, false, + csr->getDcFlushSupport(), kernel.usesImages(), workPartitionAllocationGpuVa, hwInfo); diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index 1400b29496..061ee6ccfe 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -452,6 +452,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); + testArgs.dcFlushEnable = rootCsr->getDcFlushSupport(); WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, @@ -495,7 +496,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); - + testArgs.dcFlushEnable = rootCsr->getDcFlushSupport(); WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp index 1a6d894513..69312563a5 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp @@ -992,7 +992,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB testArgs.synchronizeBeforeExecution = false; testArgs.secondaryBatchBuffer = false; testArgs.emitSelfCleanup = false; - + testArgs.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); WalkerPartition::constructDynamicallyPartitionedCommandBuffer( streamCpuPointer, taskStream->getGraphicsAllocation()->getGpuAddress(), diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index b3e6afa9d3..64f2268ec0 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -1112,6 +1112,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.partitionCount = 2u; + testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); @@ -1169,6 +1170,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.crossTileAtomicSynchronization = false; testArgs.emitPipeControlStall = false; testArgs.partitionCount = 2u; + testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); @@ -1241,6 +1243,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), true); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); + auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; @@ -1254,6 +1257,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.emitPipeControlStall = true; testArgs.crossTileAtomicSynchronization = true; testArgs.partitionCount = 16u; + testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); @@ -1270,6 +1274,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); + auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; @@ -1283,6 +1288,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr testArgs.emitPipeControlStall = false; testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; + testArgs.dcFlushEnable = csr.getDcFlushSupport(); testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 629bdb64e4..440fcb03ea 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -308,6 +308,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis !container.getFlushTaskUsedForImmediate(), !args.isKernelDispatchedFromImmediateCmdList, false, + args.dcFlushEnable, workPartitionAllocationGpuVa, hwInfo); } else { diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index a9874e6029..42a31d8772 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -51,6 +51,7 @@ struct ImplicitScalingDispatch { bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, + bool dcFlush, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo); diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 86b78b32fa..9f2bc7e1f3 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -21,7 +21,8 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar bool emitSelfCleanup, bool preferStaticPartitioning, bool staticPartitioning, - bool useSecondaryBatchBuffer) { + bool useSecondaryBatchBuffer, + bool dcFlush) { WalkerPartition::WalkerPartitionArgs args = {}; args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa; @@ -43,6 +44,8 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar args.emitBatchBufferEnd = false; args.secondaryBatchBuffer = useSecondaryBatchBuffer; + args.dcFlushEnable = dcFlush; + return args; } @@ -70,6 +73,7 @@ size_t ImplicitScalingDispatch::getSize(bool apiSelfCleanup, apiSelfCleanup, preferStaticPartitioning, staticPartitioning, + false, false); return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer(args)); @@ -83,6 +87,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, + bool dcFlush, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo) { uint32_t totalProgrammedSize = 0u; @@ -98,7 +103,8 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS apiSelfCleanup, preferStaticPartitioning, staticPartitioning, - useSecondaryBatchBuffer); + useSecondaryBatchBuffer, + dcFlush); auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}); void *commandBuffer = commandStream.getSpace(dispatchCommandsSize); diff --git a/shared/source/command_container/walker_partition_interface.h b/shared/source/command_container/walker_partition_interface.h index d3481e6526..874d4af822 100644 --- a/shared/source/command_container/walker_partition_interface.h +++ b/shared/source/command_container/walker_partition_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ struct WalkerPartitionArgs { bool emitPipeControlStall = false; bool preferredStaticPartitioning = false; bool usePostSync = false; + bool dcFlushEnable = false; }; constexpr uint32_t wparidCCSOffset = 0x221C; diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index b62c5f18e7..8765844c50 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -573,9 +573,9 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, } if (args.emitPipeControlStall) { - NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); + NEO::PipeControlArgs pipeControlArgs; + pipeControlArgs.dcFlushEnable = args.dcFlushEnable; + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs); } if (args.semaphoreProgrammingRequired) { @@ -703,9 +703,9 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, } if (args.emitPipeControlStall) { - NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); + NEO::PipeControlArgs pipeControlArgs; + pipeControlArgs.dcFlushEnable = args.dcFlushEnable; + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs); } // Synchronize tiles after walker diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index fb1272d2c0..4aec11a64c 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1092,6 +1092,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; + dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getHardwareInfo()); EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); @@ -1127,6 +1128,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp args.emitPipeControlStall = true; args.partitionCount = dispatchArgs.partitionCount; args.emitSelfCleanup = true; + args.dcFlushEnable = dispatchArgs.dcFlushEnable; auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset(args); uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index 4a743d0d36..e6d31e2bbd 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -30,7 +30,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(2u, partitionCount); @@ -72,7 +72,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -115,7 +115,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -161,7 +161,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -213,7 +213,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -267,7 +267,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -318,7 +318,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -355,7 +355,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -392,7 +392,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -440,7 +440,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -508,7 +508,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -568,7 +568,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -628,7 +628,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -695,7 +695,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -758,7 +758,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -823,7 +823,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -891,7 +891,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); @@ -958,7 +958,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index dc49ef80e4..417c457cb5 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -12,6 +12,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) { testArgs.partitionCount = 16u; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -101,7 +103,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -157,6 +159,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; testArgs.workPartitionAllocationGpuVa = 0x8000444000; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + auto walker = createWalker(postSyncAddress); uint32_t totalBytesProgrammed{}; @@ -190,7 +194,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -233,6 +237,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.partitionCount = testArgs.tileCount; checkForProperCmdBufferAddressOffset = false; testArgs.synchronizeBeforeExecution = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; testArgs.workPartitionAllocationGpuVa = 0x8000444000; @@ -289,7 +295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -332,6 +338,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.crossTileAtomicSynchronization = false; testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -368,7 +376,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -415,6 +423,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.partitionCount = testArgs.tileCount; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; @@ -465,7 +474,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -565,6 +574,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.partitionCount = testArgs.tileCount; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -614,7 +625,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -714,6 +725,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.useAtomicsForSelfCleanup = true; testArgs.emitSelfCleanup = true; testArgs.staticPartitioning = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -765,7 +778,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -870,6 +883,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.emitSelfCleanup = true; testArgs.useAtomicsForSelfCleanup = true; testArgs.staticPartitioning = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; uint64_t cmdBufferGpuAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -921,7 +936,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1025,6 +1040,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti testArgs.tileCount = 4u; testArgs.partitionCount = 16u; testArgs.emitBatchBufferEnd = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; @@ -1113,7 +1129,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -1258,6 +1274,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr testArgs.staticPartitioning = staticPartitioning; testArgs.preferredStaticPartitioning = preferredStaticPartitioning; testArgs.workPartitionAllocationGpuVa = 0x800BADA55000; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + sizeof(WalkerPartition::MI_ATOMIC) * 2 + @@ -1332,7 +1349,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp index c9ac5804d2..573f13f28f 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -839,6 +839,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) { testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + checkForProperCmdBufferAddressOffset = false; testArgs.emitSelfCleanup = true; uint64_t gpuVirtualAddress = 0x8000123000; @@ -939,7 +941,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1043,6 +1045,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse checkForProperCmdBufferAddressOffset = false; testArgs.emitSelfCleanup = true; testArgs.useAtomicsForSelfCleanup = true; + testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; WalkerPartition::COMPUTE_WALKER walker; @@ -1142,7 +1146,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); + EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp index f469000ad4..a5871d778c 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp @@ -30,6 +30,8 @@ void ImplicitScalingFixture::setUp() { commandStream.replaceGraphicsAllocation(&cmdBufferAlloc); testHardwareInfo = *defaultHwInfo; + + dcFlushFlag = pDevice->getDefaultEngine().commandStreamReceiver->getDcFlushSupport(); } void ImplicitScalingFixture::tearDown() { diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.h b/shared/test/unit_test/fixtures/implicit_scaling_fixture.h index 271d092d6b..420c364794 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.h +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.h @@ -33,6 +33,7 @@ struct ImplicitScalingFixture : public CommandEncodeStatesFixture { DeviceBitfield singleTile; DeviceBitfield twoTile; void *alignedMemory = nullptr; + bool dcFlushFlag = false; }; using ImplicitScalingTests = Test;