mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
refactor: add thread group count parameter to implicit scaling functions
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b7d7424aab
commit
897c890d03
@@ -186,6 +186,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
requiredPartitionDim, // requiredPartitionDim
|
requiredPartitionDim, // requiredPartitionDim
|
||||||
partitionCount, // partitionCount
|
partitionCount, // partitionCount
|
||||||
workgroupSize, // workgroupSize
|
workgroupSize, // workgroupSize
|
||||||
|
threadGroupCount, // threadGroupCount
|
||||||
maxWgCountPerTile, // maxWgCountPerTile
|
maxWgCountPerTile, // maxWgCountPerTile
|
||||||
false, // useSecondaryBatchBuffer
|
false, // useSecondaryBatchBuffer
|
||||||
false, // apiSelfCleanup
|
false, // apiSelfCleanup
|
||||||
|
|||||||
@@ -427,6 +427,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
args.requiredPartitionDim, // requiredPartitionDim
|
args.requiredPartitionDim, // requiredPartitionDim
|
||||||
args.partitionCount, // partitionCount
|
args.partitionCount, // partitionCount
|
||||||
workgroupSize, // workgroupSize
|
workgroupSize, // workgroupSize
|
||||||
|
threadGroupCount, // threadGroupCount
|
||||||
args.maxWgCountPerTile, // maxWgCountPerTile
|
args.maxWgCountPerTile, // maxWgCountPerTile
|
||||||
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
|
||||||
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -46,6 +46,7 @@ struct ImplicitScalingDispatchCommandArgs {
|
|||||||
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
uint32_t workgroupSize = 0;
|
uint32_t workgroupSize = 0;
|
||||||
|
uint32_t threadGroupCount = 0;
|
||||||
uint32_t maxWgCountPerTile = 0;
|
uint32_t maxWgCountPerTile = 0;
|
||||||
|
|
||||||
bool useSecondaryBatchBuffer = false;
|
bool useSecondaryBatchBuffer = false;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -50,6 +50,7 @@ WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(ImplicitScalingD
|
|||||||
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
args.blockDispatchToCommandBuffer = dispatchCommandArgs.blockDispatchToCommandBuffer;
|
||||||
|
|
||||||
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
args.workgroupSize = dispatchCommandArgs.workgroupSize;
|
||||||
|
args.threadGroupCount = dispatchCommandArgs.threadGroupCount;
|
||||||
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
args.maxWgCountPerTile = dispatchCommandArgs.maxWgCountPerTile;
|
||||||
args.isRequiredDispatchWorkGroupOrder = dispatchCommandArgs.isRequiredDispatchWorkGroupOrder;
|
args.isRequiredDispatchWorkGroupOrder = dispatchCommandArgs.isRequiredDispatchWorkGroupOrder;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -18,6 +18,7 @@ struct WalkerPartitionArgs {
|
|||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
uint32_t tileCount = 0;
|
uint32_t tileCount = 0;
|
||||||
uint32_t workgroupSize = 0;
|
uint32_t workgroupSize = 0;
|
||||||
|
uint32_t threadGroupCount = 0;
|
||||||
uint32_t maxWgCountPerTile = 0;
|
uint32_t maxWgCountPerTile = 0;
|
||||||
bool emitBatchBufferEnd = false;
|
bool emitBatchBufferEnd = false;
|
||||||
bool secondaryBatchBuffer = false;
|
bool secondaryBatchBuffer = false;
|
||||||
|
|||||||
@@ -526,15 +526,11 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm
|
|||||||
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, args.partitionCount));
|
inputWalker->setPartitionSize(Math::divideAndRoundUp(workgroupCount, args.partitionCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t threadGroupCount = inputWalker->getThreadGroupIdXDimension() *
|
|
||||||
inputWalker->getThreadGroupIdYDimension() *
|
|
||||||
inputWalker->getThreadGroupIdZDimension();
|
|
||||||
|
|
||||||
NEO::EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*inputWalker,
|
NEO::EncodeDispatchKernel<GfxFamily>::setWalkerRegionSettings(*inputWalker,
|
||||||
device,
|
device,
|
||||||
args.partitionCount,
|
args.partitionCount,
|
||||||
args.workgroupSize,
|
args.workgroupSize,
|
||||||
threadGroupCount,
|
args.threadGroupCount,
|
||||||
args.maxWgCountPerTile,
|
args.maxWgCountPerTile,
|
||||||
args.isRequiredDispatchWorkGroupOrder);
|
args.isRequiredDispatchWorkGroupOrder);
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2024 Intel Corporation
|
* Copyright (C) 2020-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -846,11 +846,15 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur
|
|||||||
&ptr, // outWalkerPtr
|
&ptr, // outWalkerPtr
|
||||||
RequiredPartitionDim::x, // requiredPartitionDim
|
RequiredPartitionDim::x, // requiredPartitionDim
|
||||||
partitionCount, // partitionCount
|
partitionCount, // partitionCount
|
||||||
|
1, // workgroupSize
|
||||||
|
1, // threadGroupCount
|
||||||
|
1, // maxWgCountPerTile
|
||||||
false, // useSecondaryBatchBuffer
|
false, // useSecondaryBatchBuffer
|
||||||
false, // apiSelfCleanup
|
false, // apiSelfCleanup
|
||||||
false, // dcFlush
|
false, // dcFlush
|
||||||
false, // forceExecutionOnSingleTile
|
false, // forceExecutionOnSingleTile
|
||||||
false}; // blockDispatchToCommandBuffer
|
false, // blockDispatchToCommandBuffer
|
||||||
|
false}; // isRequiredDispatchWorkGroupOrder
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(linearStream, walkerCmd, deviceBitField, args);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(linearStream, walkerCmd, deviceBitField, args);
|
||||||
EXPECT_EQ(0u, linearStream.getUsed());
|
EXPECT_EQ(0u, linearStream.getUsed());
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -37,6 +37,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(0, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -175,6 +176,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -228,6 +230,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -408,6 +411,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -458,6 +462,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -528,6 +533,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -589,6 +595,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -649,6 +656,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
@@ -717,6 +725,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
@@ -781,6 +790,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -846,6 +856,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -914,6 +925,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
dispatchArgs.apiSelfCleanup = true;
|
dispatchArgs.apiSelfCleanup = true;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
@@ -982,6 +994,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
|
|
||||||
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
ImplicitScalingDispatch<FamilyType>::dispatchCommands(commandStream, walker, twoTile, dispatchArgs);
|
||||||
totalBytesProgrammed = commandStream.getUsed();
|
totalBytesProgrammed = commandStream.getUsed();
|
||||||
@@ -1601,6 +1614,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
|
|||||||
|
|
||||||
uint32_t partitionCount = 0;
|
uint32_t partitionCount = 0;
|
||||||
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
auto dispatchArgs = createDispatchCommandArgs(workPartitionAllocationAddress, partitionCount);
|
||||||
|
dispatchArgs.threadGroupCount = 32;
|
||||||
dispatchArgs.blockDispatchToCommandBuffer = true;
|
dispatchArgs.blockDispatchToCommandBuffer = true;
|
||||||
dispatchArgs.outWalkerPtr = &outWalkerPtr;
|
dispatchArgs.outWalkerPtr = &outWalkerPtr;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -434,6 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
WalkerPartition::WalkerPartitionArgs args = {};
|
WalkerPartition::WalkerPartitionArgs args = {};
|
||||||
args.partitionCount = 2;
|
args.partitionCount = 2;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 7 * 10 * 11;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
@@ -448,6 +449,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
args = {};
|
args = {};
|
||||||
args.partitionCount = 2;
|
args.partitionCount = 2;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 7 * 10 * 11;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
@@ -460,6 +462,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
args = {};
|
args = {};
|
||||||
args.partitionCount = 2;
|
args.partitionCount = 2;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 7 * 10 * 11;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
@@ -473,6 +476,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen
|
|||||||
args = {};
|
args = {};
|
||||||
args.partitionCount = 1;
|
args.partitionCount = 1;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 7 * 10 * 11;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|
||||||
@@ -1793,6 +1797,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
|||||||
WalkerPartition::WalkerPartitionArgs args = {};
|
WalkerPartition::WalkerPartitionArgs args = {};
|
||||||
args.partitionCount = 2;
|
args.partitionCount = 2;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 32;
|
||||||
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
auto walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
@@ -1807,6 +1812,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi
|
|||||||
args = {};
|
args = {};
|
||||||
args.partitionCount = 2;
|
args.partitionCount = 2;
|
||||||
args.tileCount = 2;
|
args.tileCount = 2;
|
||||||
|
args.threadGroupCount = 32;
|
||||||
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
args.forceExecutionOnSingleTile = forceExecutionOnSingleTile;
|
||||||
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
programPartitionedWalker<FamilyType>(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device);
|
||||||
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
walkerCommand = genCmdCast<WalkerType *>(walkerCommandAddress);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2021-2024 Intel Corporation
|
* Copyright (C) 2021-2025 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -48,6 +48,7 @@ ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommand
|
|||||||
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
NEO::RequiredPartitionDim::none, // requiredPartitionDim
|
||||||
partitionCount, // partitionCount
|
partitionCount, // partitionCount
|
||||||
1, // workgroupSize
|
1, // workgroupSize
|
||||||
|
1, // threadGroupCount
|
||||||
1, // maxWgCountPerTile
|
1, // maxWgCountPerTile
|
||||||
true, // useSecondaryBatchBuffer
|
true, // useSecondaryBatchBuffer
|
||||||
false, // apiSelfCleanup
|
false, // apiSelfCleanup
|
||||||
|
|||||||
Reference in New Issue
Block a user