refactor: change data fields for encoder of additional walker fields

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-01-21 10:48:43 +00:00
committed by Compute-Runtime-Automation
parent 912bd8591a
commit 1e89058232
9 changed files with 43 additions and 46 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -84,12 +84,12 @@ inline void HardwareInterface<GfxFamily>::programWalker(
commandQueue.getDevice());
EncodeWalkerArgs encodeWalkerArgs{
kernel.getKernelInfo().kernelDescriptor, // kernelDescriptor
kernel.getExecutionType(), // kernelExecutionType
RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false}; // requiredSystemFence
kernel.getExecutionType(), // kernelExecutionType
RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false, // requiredSystemFence
false}; // hasSample
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
*walkerCmdBuf = walkerCmd;
}

View File

@ -146,12 +146,12 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto maxFrontEndThreads = device.getDeviceInfo().maxFrontEndThreads;
EncodeWalkerArgs encodeWalkerArgs{
kernelInfo.kernelDescriptor, // kernelDescriptor
kernel.getExecutionType(), // kernelExecutionType
kernelAttributes.dispatchWalkOrder, // requiredDispatchWalkOrder
kernelAttributes.localRegionSize, // localRegionSize
maxFrontEndThreads, // maxFrontEndThreads
requiredSystemFence}; // requiredSystemFence
kernel.getExecutionType(), // kernelExecutionType
kernelAttributes.dispatchWalkOrder, // requiredDispatchWalkOrder
kernelAttributes.localRegionSize, // localRegionSize
maxFrontEndThreads, // maxFrontEndThreads
requiredSystemFence, // requiredSystemFence
kernelInfo.kernelDescriptor.kernelAttributes.flags.hasSample}; // hasSample
EncodeDispatchKernel<GfxFamily>::template encodeAdditionalWalkerFields<WalkerType>(rootDeviceEnvironment, walkerCmd, encodeWalkerArgs);
EncodeDispatchKernel<GfxFamily>::template encodeWalkerPostSyncFields<WalkerType>(walkerCmd, encodeWalkerArgs);

View File

@ -25,14 +25,13 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
MockExecutionEnvironment executionEnvironment;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::concurrent, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
113, // maxFrontEndThreads
true}; // requiredSystemFence
true, // requiresMemoryFence
false}; // hasSample
{
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());

View File

@ -26,14 +26,13 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCal
MockExecutionEnvironment executionEnvironment;
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::concurrent, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
113, // maxFrontEndThreads
true}; // requiredSystemFence
true, // requiredSystemFence
false}; // hasSample
{
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
@ -102,24 +101,23 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, givenHasSampleSetWhenEncodingExtraPar
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
KernelDescriptor kernelDescriptor;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::defaultType, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false}; // requiredSystemFence
false, // requiredSystemFence
false}; // hasSample
{
kernelDescriptor.kernelAttributes.flags.hasSample = false;
walkerArgs.hasSample = false;
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EXPECT_NE(DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK, walkerCmd.getDispatchWalkOrder());
EXPECT_EQ(THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_1, walkerCmd.getThreadGroupBatchSize());
}
{
kernelDescriptor.kernelAttributes.flags.hasSample = true;
walkerArgs.hasSample = true;
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EXPECT_EQ(DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK, walkerCmd.getDispatchWalkOrder());
EXPECT_EQ(THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_4, walkerCmd.getThreadGroupBatchSize());

View File

@ -106,21 +106,21 @@ enum class CompareOperation : uint32_t {
struct EncodeWalkerArgs {
EncodeWalkerArgs() = delete;
EncodeWalkerArgs(const KernelDescriptor &kernelDescriptor, KernelExecutionType kernelExecutionType, NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder,
uint32_t localRegionSize, uint32_t maxFrontEndThreads, bool requiredSystemFence)
: kernelDescriptor(kernelDescriptor),
kernelExecutionType(kernelExecutionType),
EncodeWalkerArgs(KernelExecutionType kernelExecutionType, NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder,
uint32_t localRegionSize, uint32_t maxFrontEndThreads, bool requiredSystemFence, bool hasSample)
: kernelExecutionType(kernelExecutionType),
requiredDispatchWalkOrder(requiredDispatchWalkOrder),
localRegionSize(localRegionSize),
maxFrontEndThreads(maxFrontEndThreads),
requiredSystemFence(requiredSystemFence) {}
requiredSystemFence(requiredSystemFence),
hasSample(hasSample) {}
const KernelDescriptor &kernelDescriptor;
KernelExecutionType kernelExecutionType = KernelExecutionType::defaultType;
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
uint32_t localRegionSize = NEO::localRegionSizeParamNotSet;
uint32_t maxFrontEndThreads = 0;
bool requiredSystemFence = false;
bool hasSample = false;
};
template <typename GfxFamily>

View File

@ -279,12 +279,12 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
KernelExecutionType::defaultType, // kernelExecutionType
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
args.localRegionSize, // localRegionSize
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
args.requiresSystemMemoryFence()}; // requiredSystemFence
args.requiresSystemMemoryFence(), // requiresSystemFence
false}; // hasSample
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
EncodeDispatchKernel<Family>::template encodeComputeDispatchAllWalker<WalkerType, INTERFACE_DESCRIPTOR_DATA>(cmd, nullptr, rootDeviceEnvironment, walkerArgs);

View File

@ -403,12 +403,12 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto kernelExecutionType = args.isCooperative ? KernelExecutionType::concurrent : KernelExecutionType::defaultType;
EncodeWalkerArgs walkerArgs{
kernelDescriptor, // kernelDescriptor
kernelExecutionType, // kernelExecutionType
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
args.localRegionSize, // localRegionSize
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
args.requiresSystemMemoryFence()}; // requiresMemoryFence
kernelExecutionType, // kernelExecutionType
args.requiredDispatchWalkOrder, // requiredDispatchWalkOrder
args.localRegionSize, // localRegionSize
args.device->getDeviceInfo().maxFrontEndThreads, // maxFrontEndThreads
args.requiresSystemMemoryFence(), // requiresMemoryFence
kernelDescriptor.kernelAttributes.flags.hasSample}; // hasSample
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(walkerCmd, &idd, rootDeviceEnvironment, walkerArgs);

View File

@ -125,7 +125,7 @@ void EncodeSurfaceState<Family>::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceS
template <>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
if (walkerArgs.kernelDescriptor.kernelAttributes.flags.hasSample) {
if (walkerArgs.hasSample) {
walkerCmd.setDispatchWalkOrder(DefaultWalkerType::DISPATCH_WALK_ORDER::DISPATCH_WALK_ORDER_MORTON_WALK);
walkerCmd.setThreadGroupBatchSize(DefaultWalkerType::THREAD_GROUP_BATCH_SIZE::THREAD_GROUP_BATCH_SIZE_TG_BATCH_4);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -84,12 +84,12 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
EncodeWalkerArgs CommandEncodeStatesFixture::createDefaultEncodeWalkerArgs(const KernelDescriptor &kernelDescriptor) {
EncodeWalkerArgs args{
kernelDescriptor, // kernelDescriptor
NEO::KernelExecutionType::defaultType, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false}; // requiredSystemFence
NEO::KernelExecutionType::defaultType, // kernelExecutionType
NEO::RequiredDispatchWalkOrder::none, // requiredDispatchWalkOrder
0, // localRegionSize
0, // maxFrontEndThreads
false, // requiresMemoryFence
kernelDescriptor.kernelAttributes.flags.hasSample}; // hasSample
return args;
}