refactor: Pass whole kernel descriptor to barrier programming

Signed-off-by: Chodor, Jaroslaw <jaroslaw.chodor@intel.com>
This commit is contained in:
Chodor, Jaroslaw
2025-01-24 12:14:19 +00:00
committed by Compute-Runtime-Automation
parent 6d0708098a
commit 574fe9fb29
10 changed files with 28 additions and 21 deletions

View File

@@ -170,7 +170,7 @@ struct EncodeDispatchKernel {
static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc);
template <typename InterfaceDescriptorType>
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
static void programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hwInfo);
template <typename WalkerType, typename InterfaceDescriptorType>
static void encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,

View File

@@ -86,7 +86,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
kernelDescriptor,
hwInfo);
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
idd.setSharedLocalMemorySize(slmSize);
@@ -406,9 +406,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
uint32_t value,
const KernelDescriptor &kernelDescriptor,
const HardwareInfo &hwInfo) {
interfaceDescriptor.setBarrierEnable(value);
interfaceDescriptor.setBarrierEnable(kernelDescriptor.kernelAttributes.barrierCount);
}
template <typename Family>

View File

@@ -21,7 +21,7 @@ template void NEO::EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::Defau
template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerType>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const KernelDescriptor &kernelDescriptor, const HardwareInfo &hwInfo);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@ inline void EncodeAtomic<Family>::setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
uint32_t value,
const KernelDescriptor &kernelDescriptor,
const HardwareInfo &hwInfo) {
using BARRIERS = typename InterfaceDescriptorType::NUMBER_OF_BARRIERS;
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
@@ -52,7 +52,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType
{16, BARRIERS::NUMBER_OF_BARRIERS_B16},
{24, BARRIERS::NUMBER_OF_BARRIERS_B24},
{32, BARRIERS::NUMBER_OF_BARRIERS_B32}}});
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
BARRIERS numBarriers = barrierLookupArray.lookUp(kernelDescriptor.kernelAttributes.barrierCount);
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}

View File

@@ -109,7 +109,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
kernelDescriptor,
hwInfo);
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);