Pass DispatchInfo to estimation functions

Related-To: NEO-5546 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
2026-01-03 06:49:52 +08:00 · 2021-03-03 17:29:32 +00:00
parent bfd9aba350
commit 1350aa52fb
12 changed files with 43 additions and 36 deletions
--- a/shared/source/command_container/command_encoder.h
+++ b/shared/source/command_container/command_encoder.h
@@ -46,7 +46,7 @@ struct EncodeDispatchKernel {

    static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);

-    static size_t estimateEncodeDispatchKernelCmdsSize(Device *device);
+    static size_t estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount);

    static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
                                                    size_t *lws,
--- a/shared/source/command_container/command_encoder_bdw_plus.inl
+++ b/shared/source/command_container/command_encoder_bdw_plus.inl
@@ -43,7 +43,13 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
    LinearStream *listCmdBufferStream = container.getCommandStream();
    size_t sshOffset = 0;

-    size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device);
+    auto threadDims = static_cast<const uint32_t *>(pThreadGroupDimensions);
+    const Vec3<size_t> threadStartVec{0, 0, 0};
+    Vec3<size_t> threadDimsVec{0, 0, 0};
+    if (threadDims != nullptr) {
+        threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
+    }
+    size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device, threadStartVec, threadDimsVec);
    if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
        auto bbEnd = listCmdBufferStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
        *bbEnd = Family::cmdInitBatchBufferEnd;
@@ -191,7 +197,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,

    EncodeDispatchKernel<Family>::encodeThreadData(cmd,
                                                   nullptr,
-                                                   static_cast<const uint32_t *>(pThreadGroupDimensions),
+                                                   threadDims,
                                                   dispatchInterface->getGroupSize(),
                                                   kernelDescriptor.kernelAttributes.simdSize,
                                                   kernelDescriptor.kernelAttributes.numLocalIdChannels,
@@ -316,7 +322,7 @@ template <typename Family>
 void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}

 template <typename Family>
-size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
+size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3<size_t> groupStart, Vec3<size_t> groupCount) {
    using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
    using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
    using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
--- a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp
+++ b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp
@@ -105,9 +105,9 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBuff
    using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
    using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;

-    auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice);
+    auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));
    static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
-    auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice);
+    auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1));

    auto expectedDiff = 2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
    auto diff = sizeWA - size;