refactor: change encoder for thread group over dispatch 1/n

- change method name to more meaningful - add all inputs of the algorithm as explicit function arguments - position all implementations accordingly - rename unit test names to fit new method name - fix unit test to have correct initial command values - fix unit test to have consistent input values with command values - fix unit test to change input values together with command values Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2026-01-03 06:49:52 +08:00 · 2024-10-25 09:03:09 +00:00
parent 8c3c703ec0
commit bbdf1ac7b6
11 changed files with 343 additions and 244 deletions
--- a/shared/source/command_container/command_encoder_bdw_and_later.inl
+++ b/shared/source/command_container/command_encoder_bdw_and_later.inl
@@ -67,12 +67,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis

    LinearStream *listCmdBufferStream = container.getCommandStream();

-    auto threadDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
-    const Vec3<size_t> threadStartVec{0, 0, 0};
-    Vec3<size_t> threadDimsVec{0, 0, 0};
-    if (!args.isIndirect) {
-        threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
-    }
+    auto threadGroupDims = static_cast<const uint32_t *>(args.threadGroupDimensions);

    DefaultWalkerType cmd = Family::cmdInitGpgpuWalker;
    auto idd = Family::cmdInitInterfaceDescriptorData;
@@ -267,11 +262,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis

    EncodeDispatchKernel<Family>::encodeThreadData(cmd,
                                                   nullptr,
-                                                   threadDims,
+                                                   threadGroupDims,
                                                   args.dispatchInterface->getGroupSize(),
                                                   kernelDescriptor.kernelAttributes.simdSize,
                                                   kernelDescriptor.kernelAttributes.numLocalIdChannels,
-                                                   args.dispatchInterface->getNumThreadsPerThreadGroup(),
+                                                   numThreadsPerThreadGroup,
                                                   args.dispatchInterface->getThreadExecutionMask(),
                                                   true,
                                                   false,
@@ -282,7 +277,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
    cmd.setPredicateEnable(args.isPredicate);

    auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
-    EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, *args.device, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, cmd);
+    EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);

    memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));

@@ -635,4 +630,11 @@ template <typename WalkerType, typename InterfaceDescriptorType>
 void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
 }

+template <typename Family>
+template <typename WalkerType, typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
+                                                             const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup,
+                                                             WalkerType &walkerCmd) {
+}
+
 } // namespace NEO