Fix execution of cooperative kernels on multi-tile device

Add flag for forcing execution of kernels on single tile
Force cooperative kernels to use only single tile

Related-to: NEO-6729
Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2022-11-15 13:48:45 +00:00
committed by Compute-Runtime-Automation
parent 359b9278b8
commit 914939c377
15 changed files with 182 additions and 62 deletions

View File

@@ -301,8 +301,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
if ((args.partitionCount > 1 && !args.isCooperative) &&
!args.isInternal) {
if (args.partitionCount > 1 && !args.isInternal) {
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
if (args.eventAddress != 0) {
postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP);
@@ -315,6 +314,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
!args.isKernelDispatchedFromImmediateCmdList,
false,
args.dcFlushEnable,
args.isCooperative,
workPartitionAllocationGpuVa,
hwInfo);
} else {

View File

@@ -53,6 +53,7 @@ struct ImplicitScalingDispatch {
bool apiSelfCleanup,
bool usesImages,
bool dcFlush,
bool forceExecutionOnSingleTile,
uint64_t workPartitionAllocationGpuVa,
const HardwareInfo &hwInfo);

View File

@@ -22,7 +22,8 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar
bool preferStaticPartitioning,
bool staticPartitioning,
bool useSecondaryBatchBuffer,
bool dcFlush) {
bool dcFlush,
bool forceExecutionOnSingleTile) {
WalkerPartition::WalkerPartitionArgs args = {};
args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa;
@@ -30,6 +31,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPar
args.tileCount = tileCount;
args.staticPartitioning = staticPartitioning;
args.preferredStaticPartitioning = preferStaticPartitioning;
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup();
args.initializeWparidRegister = ImplicitScalingHelper::isWparidRegisterInitializationRequired();
@@ -76,6 +78,7 @@ size_t ImplicitScalingDispatch<GfxFamily>::getSize(bool apiSelfCleanup,
preferStaticPartitioning,
staticPartitioning,
false,
false,
false);
return static_cast<size_t>(WalkerPartition::estimateSpaceRequiredInCommandBuffer<GfxFamily>(args));
@@ -90,6 +93,7 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
bool apiSelfCleanup,
bool usesImages,
bool dcFlush,
bool forceExecutionOnSingleTile,
uint64_t workPartitionAllocationGpuVa,
const HardwareInfo &hwInfo) {
uint32_t totalProgrammedSize = 0u;
@@ -106,7 +110,8 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchCommands(LinearStream &commandS
preferStaticPartitioning,
staticPartitioning,
useSecondaryBatchBuffer,
dcFlush);
dcFlush,
forceExecutionOnSingleTile);
auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()});
void *commandBuffer = commandStream.getSpace(dispatchCommandsSize);

View File

@@ -31,6 +31,7 @@ struct WalkerPartitionArgs {
bool usePostSync = false;
bool pipeControlBeforeCleanupCrossTileSync = false;
bool dcFlushEnable = false;
bool forceExecutionOnSingleTile = false;
};
constexpr uint32_t wparidCCSOffset = 0x221C;

View File

@@ -480,7 +480,8 @@ uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) {
template <typename GfxFamily>
void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed,
COMPUTE_WALKER<GfxFamily> *inputWalker,
uint32_t partitionCount) {
uint32_t partitionCount,
bool forceExecutionOnSingleTile) {
auto computeWalker = putCommand<COMPUTE_WALKER<GfxFamily>>(inputAddress, totalBytesProgrammed);
COMPUTE_WALKER<GfxFamily> cmd = *inputWalker;
@@ -503,7 +504,11 @@ void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramme
workgroupCount = inputWalker->getThreadGroupIdZDimension();
}
cmd.setPartitionSize((workgroupCount + partitionCount - 1u) / partitionCount);
if (forceExecutionOnSingleTile) {
cmd.setPartitionSize(workgroupCount);
} else {
cmd.setPartitionSize(Math::divideAndRoundUp(workgroupCount, partitionCount));
}
}
*computeWalker = cmd;
}
@@ -614,7 +619,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
args.secondaryBatchBuffer);
// Walker section
programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile);
programMiBatchBufferStart<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, args.secondaryBatchBuffer);
@@ -704,7 +709,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
if (args.initializeWparidRegister) {
programMiLoadRegisterMem<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset);
}
programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount);
programPartitionedWalker<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount, args.forceExecutionOnSingleTile);
// Prepare for cleanup section
if (args.emitSelfCleanup) {

View File

@@ -1026,6 +1026,36 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling,
EXPECT_EQ(eventAddress, postSync.getDestinationAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativeKernelWhenEncodingDispatchKernelThenExpectPartitionSizeEqualWorkgroupSize) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
uint32_t dims[] = {16, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false;
bool isInternal = false;
bool isCooperative = true;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.isInternal = isInternal;
dispatchArgs.isCooperative = isCooperative;
dispatchArgs.partitionCount = 2;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
size_t containerUsedAfterBase = cmdContainer->getCommandStream()->getUsed();
GenCmdList partitionedWalkerList;
CmdParse<FamilyType>::parseCommandBuffer(partitionedWalkerList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), containerUsedAfterBase);
auto itor = find<WALKER_TYPE *>(partitionedWalkerList.begin(), partitionedWalkerList.end());
ASSERT_NE(itor, partitionedWalkerList.end());
auto partitionWalkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
EXPECT_EQ(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X, partitionWalkerCmd->getPartitionType());
uint32_t expectedPartitionSize = dims[0];
EXPECT_EQ(expectedPartitionSize, partitionWalkerCmd->getPartitionSize());
}
struct CommandEncodeStatesDynamicImplicitScalingFixture : CommandEncodeStatesImplicitScalingFixture {
void setUp() {
DebugManager.flags.EnableStaticPartitioning.set(0);

View File

@@ -30,7 +30,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(32, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(2u, partitionCount);
@@ -72,7 +73,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, dcFlushFlag,
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(1u, partitionCount);
@@ -115,7 +117,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition
expectedSize = ImplicitScalingDispatch<FamilyType>::getSize(false, false, twoTile, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag, 0u, *defaultHwInfo);
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
forceExecutionOnSingleTileFlag, 0u, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(1u, partitionCount);
@@ -162,7 +165,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(2u, partitionCount);
@@ -214,7 +217,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(2u, partitionCount);
@@ -268,7 +271,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -319,7 +322,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -356,7 +359,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -393,7 +396,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -441,7 +444,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -509,7 +512,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -569,7 +572,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -629,7 +632,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -696,7 +699,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -759,7 +762,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -824,7 +827,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -892,7 +895,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);
@@ -959,7 +962,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
uint32_t partitionCount = 0;
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, dcFlushFlag,
workPartitionAllocationAddress, *defaultHwInfo);
forceExecutionOnSingleTileFlag, workPartitionAllocationAddress, *defaultHwInfo);
totalBytesProgrammed = commandStream.getUsed();
EXPECT_EQ(expectedSize, totalBytesProgrammed);
EXPECT_EQ(twoTile.count(), partitionCount);

View File

@@ -400,7 +400,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X);
void *walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u);
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
auto walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
@@ -411,7 +411,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Y);
walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u);
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
@@ -420,7 +420,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_Z);
walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u);
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
@@ -430,7 +430,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
//if we program with partition Count == 1 then do not trigger partition stuff
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_DISABLED);
walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u);
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 1u, false);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
@@ -506,7 +506,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerWithDifferentWorkg
EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z, walker.getPartitionType());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisalbedMinimalPartitionSizeWhenCoomputePartitionSizeThenProperValueIsReturned) {
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDisabledMinimalPartitionSizeWhenComputePartitionSizeThenProperValueIsReturned) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(64u);
@@ -1672,3 +1672,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
EXPECT_EQ(parsedOffset, expectedCommandUsedSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTileWhenProgramComputeWalkerThenWalkerIsProperlyProgrammed) {
WalkerPartition::COMPUTE_WALKER<FamilyType> walker;
walker = FamilyType::cmdInitGpgpuWalker;
walker.setThreadGroupIdXDimension(32u);
walker.setThreadGroupIdYDimension(1u);
walker.setThreadGroupIdZDimension(1u);
bool forceExecutionOnSingleTile = false;
walker.setPartitionType(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X);
void *walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile);
auto walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType());
EXPECT_EQ(16u, walkerCommand->getPartitionSize());
forceExecutionOnSingleTile = true;
walkerCommandAddress = cmdBufferAddress;
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, 2u, forceExecutionOnSingleTile);
walkerCommand = genCmdCast<COMPUTE_WALKER<FamilyType> *>(walkerCommandAddress);
ASSERT_NE(nullptr, walkerCommand);
EXPECT_TRUE(walkerCommand->getWorkloadPartitionEnable());
EXPECT_EQ(COMPUTE_WALKER<FamilyType>::PARTITION_TYPE::PARTITION_TYPE_X, walkerCommand->getPartitionType());
EXPECT_EQ(32u, walkerCommand->getPartitionSize());
}

View File

@@ -34,6 +34,7 @@ struct ImplicitScalingFixture : public CommandEncodeStatesFixture {
DeviceBitfield twoTile;
void *alignedMemory = nullptr;
bool dcFlushFlag = false;
bool forceExecutionOnSingleTileFlag = false;
};
using ImplicitScalingTests = Test<ImplicitScalingFixture>;