refactor: Pass whole kernel descriptor to barrier programming

Signed-off-by: Chodor, Jaroslaw <jaroslaw.chodor@intel.com>
This commit is contained in:
Chodor, Jaroslaw
2025-01-24 12:14:19 +00:00
committed by Compute-Runtime-Automation
parent 6d0708098a
commit 574fe9fb29
10 changed files with 28 additions and 21 deletions

View File

@@ -185,7 +185,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
kernelDescriptor.kernelAttributes.barrierCount,
kernelDescriptor,
hardwareInfo);
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);

View File

@@ -170,7 +170,7 @@ struct EncodeDispatchKernel {
static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc);
template <typename InterfaceDescriptorType>
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hwInfo);
template <typename WalkerType, typename InterfaceDescriptorType>
static void encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,

View File

@@ -86,7 +86,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
kernelDescriptor,
hwInfo);
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
idd.setSharedLocalMemorySize(slmSize);
@@ -406,9 +406,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
uint32_t value,
const KernelDescriptor &kernelDescriptor,
const HardwareInfo &hwInfo) {
interfaceDescriptor.setBarrierEnable(value);
interfaceDescriptor.setBarrierEnable(kernelDescriptor.kernelAttributes.barrierCount);
}
template <typename Family>

View File

@@ -21,7 +21,7 @@ template void NEO::EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::Defau
template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerType>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hwInfo);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ inline void EncodeAtomic<Family>::setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
uint32_t value,
const KernelDescriptor &kernelDescriptor,
const HardwareInfo &hwInfo) {
using BARRIERS = typename InterfaceDescriptorType::NUMBER_OF_BARRIERS;
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
@@ -52,7 +52,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType
{16, BARRIERS::NUMBER_OF_BARRIERS_B16},
{24, BARRIERS::NUMBER_OF_BARRIERS_B24},
{32, BARRIERS::NUMBER_OF_BARRIERS_B32}}});
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
BARRIERS numBarriers = barrierLookupArray.lookUp(kernelDescriptor.kernelAttributes.barrierCount);
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}

View File

@@ -109,7 +109,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
kernelDescriptor,
hwInfo);
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);

View File

@@ -49,11 +49,11 @@ void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptor
template <>
template <>
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hwInfo) {
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
static const LookupArray<uint32_t, BARRIERS, 2> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
{1, BARRIERS::NUMBER_OF_BARRIERS_B1}}});
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
BARRIERS numBarriers = barrierLookupArray.lookUp(kernelDescriptor.kernelAttributes.barrierCount);
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1398,14 +1398,17 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, InterfaceDescriptorDataTests, givenVariousValuesW
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
MockDevice device;
auto hwInfo = device.getHardwareInfo();
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, 0, hwInfo);
KernelDescriptor kd = {};
kd.kernelAttributes.barrierCount = 0;
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, kd, hwInfo);
EXPECT_FALSE(idd.getBarrierEnable());
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, 1, hwInfo);
kd.kernelAttributes.barrierCount = 1;
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, kd, hwInfo);
EXPECT_TRUE(idd.getBarrierEnable());
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, 2, hwInfo);
kd.kernelAttributes.barrierCount = 2;
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, kd, hwInfo);
EXPECT_TRUE(idd.getBarrierEnable());
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -73,8 +73,10 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBa
{16, 5},
{24, 6},
{32, 7}};
KernelDescriptor kd = {};
for (auto &[barrierCount, numBarriersEnum] : barriers) {
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, hwInfo);
kd.kernelAttributes.barrierCount = barrierCount;
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, kd, hwInfo);
EXPECT_EQ(numBarriersEnum, idd.getNumberOfBarriers());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,8 +28,10 @@ HWTEST2_F(CommandEncodeStatesTestXeHpgCore, givenVariousValuesWhenCallingSetBarr
uint32_t barrierCounts[] = {0, 1};
KernelDescriptor kd = {};
for (auto barrierCount : barrierCounts) {
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, *defaultHwInfo);
kd.kernelAttributes.barrierCount = barrierCount;
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, kd, *defaultHwInfo);
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
}