mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
refactor: add thread group count parameter to implicit scaling functions
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
b7d7424aab
commit
897c890d03
@ -427,6 +427,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.requiredPartitionDim, // requiredPartitionDim
|
||||
args.partitionCount, // partitionCount
|
||||
workgroupSize, // workgroupSize
|
||||
threadGroupCount, // threadGroupCount
|
||||
args.maxWgCountPerTile, // maxWgCountPerTile
|
||||
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
||||
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -46,6 +46,7 @@ struct ImplicitScalingDispatchCommandArgs {
|
||||
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
||||
uint32_t partitionCount = 0;
|
||||
uint32_t workgroupSize = 0;
|
||||
uint32_t threadGroupCount = 0;
|
||||
uint32_t maxWgCountPerTile = 0;
|
||||
|
||||
bool useSecondaryBatchBuffer = false;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -50,6 +50,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingD
|
||||
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
||||
|
||||
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
||||
args.threadGroupCount = dispatchCommandArgs.threadGroupCount;
|
||||
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
||||
args.isRequiredDispatchWorkGroupOrder = dispatchCommandArgs.isRequiredDispatchWorkGroupOrder;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -18,6 +18,7 @@ struct WalkerPartitionArgs {
|
||||
uint32_t partitionCount = 0;
|
||||
uint32_t tileCount = 0;
|
||||
uint32_t workgroupSize = 0;
|
||||
uint32_t threadGroupCount = 0;
|
||||
uint32_t maxWgCountPerTile = 0;
|
||||
bool emitBatchBufferEnd = false;
|
||||
bool secondaryBatchBuffer = false;
|
||||
|
@ -526,15 +526,11 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
||||
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, args.partitionCount));
|
||||
}
|
||||
|
||||
uint32_t threadGroupCount = inputWalker->getThreadGroupIdXDimension() *
|
||||
inputWalker->getThreadGroupIdYDimension() *
|
||||
inputWalker->getThreadGroupIdZDimension();
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*inputWalker,
|
||||
device,
|
||||
args.partitionCount,
|
||||
args.workgroupSize,
|
||||
threadGroupCount,
|
||||
args.threadGroupCount,
|
||||
args.maxWgCountPerTile,
|
||||
args.isRequiredDispatchWorkGroupOrder);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -846,11 +846,15 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur
|
||||
&ptr, // outWalkerPtr
|
||||
RequiredPartitionDim::x, // requiredPartitionDim
|
||||
partitionCount, // partitionCount
|
||||
1, // workgroupSize
|
||||
1, // threadGroupCount
|
||||
1, // maxWgCountPerTile
|
||||
false, // useSecondaryBatchBuffer
|
||||
false, // apiSelfCleanup
|
||||
false, // dcFlush
|
||||
false, // forceExecutionOnSingleTile
|
||||
false}; // blockDispatchToCommandBuffer
|
||||
false, // blockDispatchToCommandBuffer
|
||||
false}; // isRequiredDispatchWorkGroupOrder
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(linearStream, walkerCmd, deviceBitField, args);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -37,6 +37,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -175,6 +176,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -228,6 +230,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -408,6 +411,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -458,6 +462,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -528,6 +533,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -589,6 +595,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -649,6 +656,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
@ -717,6 +725,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
@ -781,6 +790,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -846,6 +856,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -914,6 +925,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
dispatchArgs.apiSelfCleanup = true;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
@ -982,6 +994,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
|
||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||
totalBytesProgrammed = commandStream.getUsed();
|
||||
@ -1601,6 +1614,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||
dispatchArgs.threadGroupCount = 32;
|
||||
dispatchArgs.blockDispatchToCommandBuffer = true;
|
||||
dispatchArgs.outWalkerPtr = &outWalkerPtr;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -434,6 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
WalkerPartition::WalkerPartitionArgs args = {};
|
||||
args.partitionCount = 2;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 7 * 10 * 11;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
@ -448,6 +449,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
args = {};
|
||||
args.partitionCount = 2;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 7 * 10 * 11;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
@ -460,6 +462,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
args = {};
|
||||
args.partitionCount = 2;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 7 * 10 * 11;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
@ -473,6 +476,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
||||
args = {};
|
||||
args.partitionCount = 1;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 7 * 10 * 11;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
||||
@ -1793,6 +1797,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
||||
WalkerPartition::WalkerPartitionArgs args = {};
|
||||
args.partitionCount = 2;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 32;
|
||||
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
@ -1807,6 +1812,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
||||
args = {};
|
||||
args.partitionCount = 2;
|
||||
args.tileCount = 2;
|
||||
args.threadGroupCount = 32;
|
||||
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -48,6 +48,7 @@ ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommand
|
||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||
partitionCount, // partitionCount
|
||||
1, // workgroupSize
|
||||
1, // threadGroupCount
|
||||
1, // maxWgCountPerTile
|
||||
true, // useSecondaryBatchBuffer
|
||||
false, // apiSelfCleanup
|
||||
|
Reference in New Issue
Block a user