Replace virtual method call for DC flush with stored bool value 3/n

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-10-11 23:57:08 +00:00 committed by Compute-Runtime-Automation
parent 33f5915062
commit 9d94089a95
15 changed files with 87 additions and 43 deletions

View File

@ -125,7 +125,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());
if (partitionWalker) {
const uint64_t workPartitionAllocationGpuVa = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
auto csr = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver;
const uint64_t workPartitionAllocationGpuVa = csr->getWorkPartitionAllocationGpuAddress();
uint32_t partitionCount = 0u;
ImplicitScalingDispatch<GfxFamily>::dispatchCommands(commandStream,
walkerCmd,
@ -133,6 +134,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
partitionCount,
false,
false,
csr->getDcFlushSupport(),
kernel.usesImages(),
workPartitionAllocationGpuVa,
hwInfo);

View File

@ -452,6 +452,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalker
testArgs.emitSelfCleanup = false;
testArgs.staticPartitioning = true;
testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress();
testArgs.dcFlushEnable = rootCsr->getDcFlushSupport();
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(
taskStreamCpu,
taskStreamGpu,
@ -495,7 +496,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalke
testArgs.emitSelfCleanup = false;
testArgs.staticPartitioning = true;
testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress();
testArgs.dcFlushEnable = rootCsr->getDcFlushSupport();
WalkerPartition::constructStaticallyPartitionedCommandBuffer<FamilyType>(
taskStreamCpu,
taskStreamGpu,

View File

@ -992,7 +992,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB
testArgs.synchronizeBeforeExecution = false;
testArgs.secondaryBatchBuffer = false;
testArgs.emitSelfCleanup = false;
testArgs.dcFlushEnable = NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
WalkerPartition::constructDynamicallyPartitionedCommandBuffer<FamilyType>(
streamCpuPointer,
taskStream->getGraphicsAllocation()->getGpuAddress(),

View File

@ -1112,6 +1112,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.crossTileAtomicSynchronization = true;
testArgs.emitPipeControlStall = true;
testArgs.partitionCount = 2u;
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
DebugManager.flags.SynchronizeWalkerInWparidMode.set(0);
@ -1169,6 +1170,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.crossTileAtomicSynchronization = false;
testArgs.emitPipeControlStall = false;
testArgs.partitionCount = 2u;
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
DebugManager.flags.SynchronizeWalkerInWparidMode.set(0);
@ -1241,6 +1243,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto &csr = cmdQ->getUltCommandStreamReceiver();
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1;
@ -1254,6 +1257,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.emitPipeControlStall = true;
testArgs.crossTileAtomicSynchronization = true;
testArgs.partitionCount = 16u;
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
@ -1270,6 +1274,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto &csr = cmdQ->getUltCommandStreamReceiver();
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1;
@ -1283,6 +1288,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr
testArgs.emitPipeControlStall = false;
testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u;
testArgs.dcFlushEnable = csr.getDcFlushSupport();
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);

View File

@ -308,6 +308,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
!container.getFlushTaskUsedForImmediate(),
!args.isKernelDispatchedFromImmediateCmdList,
false,
args.dcFlushEnable,
workPartitionAllocationGpuVa,
hwInfo);
} else {

View File

@ -51,6 +51,7 @@ struct ImplicitScalingDispatch {
bool useSecondaryBatchBuffer,
bool apiSelfCleanup,
bool usesImages,
bool dcFlush,
uint64_t workPartitionAllocationGpuVa,
const HardwareInfo &hwInfo);

View File

@ -21,7 +21,8 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar
bool emitSelfCleanup,
bool preferStaticPartitioning,
bool staticPartitioning,
bool useSecondaryBatchBuffer) {
bool useSecondaryBatchBuffer,
bool dcFlush) {
WalkerPartition::WalkerPartitionArgs args = {};
args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa;
@ -43,6 +44,8 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar
args.emitBatchBufferEnd = false;
args.secondaryBatchBuffer = useSecondaryBatchBuffer;
args.dcFlushEnable = dcFlush;
return args;
}
@ -70,6 +73,7 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool apiSelfCleanup,
apiSelfCleanup,
preferStaticPartitioning,
staticPartitioning,
false,
false);
return static_cast<size_t>(WalkerPartition::estimateSpaceRequiredInCommandBuffer<GfxFamily>(args));
@ -83,6 +87,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
bool useSecondaryBatchBuffer,
bool apiSelfCleanup,
bool usesImages,
bool dcFlush,
uint64_t workPartitionAllocationGpuVa,
const HardwareInfo &hwInfo) {
uint32_t totalProgrammedSize = 0u;
@ -98,7 +103,8 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
apiSelfCleanup,
preferStaticPartitioning,
staticPartitioning,
useSecondaryBatchBuffer);
useSecondaryBatchBuffer,
dcFlush);
auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()});
void *commandBuffer = commandStream.getSpace(dispatchCommandsSize);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -29,6 +29,7 @@ struct WalkerPartitionArgs {
bool emitPipeControlStall = false;
bool preferredStaticPartitioning = false;
bool usePostSync = false;
bool dcFlushEnable = false;
};
constexpr uint32_t wparidCCSOffset = 0x221C;

View File

@ -573,9 +573,9 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
}
if (args.emitPipeControlStall) {
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args);
NEO::PipeControlArgs pipeControlArgs;
pipeControlArgs.dcFlushEnable = args.dcFlushEnable;
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs);
}
if (args.semaphoreProgrammingRequired) {
@ -703,9 +703,9 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
}
if (args.emitPipeControlStall) {
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args);
NEO::PipeControlArgs pipeControlArgs;
pipeControlArgs.dcFlushEnable = args.dcFlushEnable;
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, pipeControlArgs);
}
// Synchronize tiles after walker

View File

@ -1092,6 +1092,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isInternal = isInternal;
dispatchArgs.partitionCount = 2;
dispatchArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, pDevice->getHardwareInfo());
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
@ -1127,6 +1128,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp
args.emitPipeControlStall = true;
args.partitionCount = dispatchArgs.partitionCount;
args.emitSelfCleanup = true;
args.dcFlushEnable = dispatchArgs.dcFlushEnable;
auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset<FamilyType>(args);
uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() +

View File

@ -30,7 +30,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(2u, partitionCount);
@ -72,7 +72,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(1u, partitionCount);
@ -115,7 +115,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(1u, partitionCount);
@ -161,7 +161,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -213,7 +213,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -267,7 +267,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -318,7 +318,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -355,7 +355,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, true, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -392,7 +392,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -440,7 +440,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -508,7 +508,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -568,7 +568,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -628,7 +628,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -695,7 +695,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -758,7 +758,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -823,7 +823,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -891,7 +891,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
@ -958,7 +958,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
EXPECT_EQ(expectedSize, estimatedSize);
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false,
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);

View File

@ -12,6 +12,8 @@
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConstructCommandBufferIsCalledThenBatchBufferIsBeingProgrammed) {
testArgs.partitionCount = 16u;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
@ -101,7 +103,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
@ -157,6 +159,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
testArgs.workPartitionAllocationGpuVa = 0x8000444000;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
auto walker = createWalker<FamilyType>(postSyncAddress);
uint32_t totalBytesProgrammed{};
@ -190,7 +194,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -233,6 +237,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
testArgs.partitionCount = testArgs.tileCount;
checkForProperCmdBufferAddressOffset = false;
testArgs.synchronizeBeforeExecution = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
testArgs.workPartitionAllocationGpuVa = 0x8000444000;
@ -289,7 +295,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -332,6 +338,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
testArgs.crossTileAtomicSynchronization = false;
testArgs.tileCount = 4u;
testArgs.partitionCount = testArgs.tileCount;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
@ -368,7 +376,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miSemaphoreWait = genCmdCast<WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -415,6 +423,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
testArgs.partitionCount = testArgs.tileCount;
testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000;
@ -465,7 +474,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -565,6 +574,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
testArgs.partitionCount = testArgs.tileCount;
testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
@ -614,7 +625,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -714,6 +725,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
testArgs.useAtomicsForSelfCleanup = true;
testArgs.emitSelfCleanup = true;
testArgs.staticPartitioning = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
@ -765,7 +778,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -870,6 +883,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
testArgs.emitSelfCleanup = true;
testArgs.useAtomicsForSelfCleanup = true;
testArgs.staticPartitioning = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t cmdBufferGpuAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
@ -921,7 +936,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit
ASSERT_NE(nullptr, pipeControl);
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
}
{
auto miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -1025,6 +1040,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
testArgs.tileCount = 4u;
testArgs.partitionCount = 16u;
testArgs.emitBatchBufferEnd = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
uint64_t gpuVirtualAddress = 0x8000123000;
@ -1113,7 +1129,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
@ -1258,6 +1274,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr
testArgs.staticPartitioning = staticPartitioning;
testArgs.preferredStaticPartitioning = preferredStaticPartitioning;
testArgs.workPartitionAllocationGpuVa = 0x800BADA55000;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM<FamilyType>) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) * 2 +
@ -1332,7 +1349,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -839,6 +839,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitioningWhenZD
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDebugForceDisableCrossTileSyncThenSelfCleanupOverridesDebugAndAddsOwnCleanupSection) {
testArgs.crossTileAtomicSynchronization = false;
testArgs.partitionCount = 16u;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
checkForProperCmdBufferAddressOffset = false;
testArgs.emitSelfCleanup = true;
uint64_t gpuVirtualAddress = 0x8000123000;
@ -939,7 +941,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
@ -1043,6 +1045,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
checkForProperCmdBufferAddressOffset = false;
testArgs.emitSelfCleanup = true;
testArgs.useAtomicsForSelfCleanup = true;
testArgs.dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
uint64_t gpuVirtualAddress = 0x8000123000;
uint64_t postSyncAddress = 0x8000456000;
WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
@ -1142,7 +1146,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
EXPECT_EQ(testArgs.dcFlushEnable, pipeControl->getDcFlushEnable());
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
miAtomic = genCmdCast<WalkerPartition::MI_ATOMIC<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));

View File

@ -30,6 +30,8 @@ void ImplicitScalingFixture::setUp() {
commandStream.replaceGraphicsAllocation(&cmdBufferAlloc);
testHardwareInfo = *defaultHwInfo;
dcFlushFlag = pDevice->getDefaultEngine().commandStreamReceiver->getDcFlushSupport();
}
void ImplicitScalingFixture::tearDown() {

View File

@ -33,6 +33,7 @@ struct ImplicitScalingFixture : public CommandEncodeStatesFixture {
DeviceBitfield singleTile;
DeviceBitfield twoTile;
void *alignedMemory = nullptr;
bool dcFlushFlag = false;
};
using ImplicitScalingTests = Test<ImplicitScalingFixture>;