fix: ensure payload arguments are patched before walker command is fetched

In case of indirect kernel launch some payload arguments are patched
just before walker command, this change disables prefetch, performs
batch buffer start to next bytes and then re-enable prefetch. All these
operations are performed between MI_STORE_REGISTER_MEM and COMPUTE_WALKER

Related-To: NEO-14584
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2025-04-07 13:59:39 +00:00 committed by Compute-Runtime-Automation
parent ca45573dd3
commit 1c377dc930
2 changed files with 66 additions and 0 deletions

View File

@ -631,6 +631,13 @@ void EncodeIndirectParams<Family>::encode(CommandContainer &container, uint64_t
setWorkDimIndirect(container, numWorkDimOffsetV1, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
}
}
if (outArgs && !outArgs->commandsToPatch.empty()) {
auto &commandStream = *container.getCommandStream();
EncodeMiArbCheck<Family>::program(commandStream, true);
auto gpuVa = commandStream.getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<Family>::getBatchBufferStartSize();
EncodeBatchBufferStartOrEnd<Family>::programBatchBufferStart(&commandStream, gpuVa, !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), false, false);
EncodeMiArbCheck<Family>::program(commandStream, false);
}
}
template <typename Family>

View File

@ -10,6 +10,7 @@
#include "shared/source/indirect_heap/heap_size.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/mocks/mock_dispatch_kernel_encoder_interface.h"
@ -489,6 +490,64 @@ HWTEST_F(CommandEncoderMathTest, givenPayloadArgumentStoredInInlineDataWhenEncod
}
}
HWTEST_F(CommandEncoderMathTest, givenPayloadArgumentStoredInInlineDataWhenEncodeIndirectParamsThenPreparserMitigationIsProgrammed) {
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
CommandContainer cmdContainer0;
cmdContainer0.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
CommandContainer cmdContainer1;
cmdContainer1.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);
uint64_t crossThreadGpuVa = 0xBADF000;
IndirectParamsInInlineDataArgs args{};
MockDispatchKernelEncoder dispatchInterface;
auto &kernelDescriptor = dispatchInterface.kernelDescriptor;
uint32_t groupSizes[3] = {1, 2, 3};
dispatchInterface.getGroupSizeResult = groupSizes;
kernelDescriptor.kernelAttributes.inlineDataPayloadSize = 0x100;
kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0x100;
kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 0x110;
kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = undefined<CrossThreadDataOffset>;
kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = undefined<CrossThreadDataOffset>;
kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = 0x120;
kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = 0x130;
kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0x140;
EncodeIndirectParams<FamilyType>::encode(cmdContainer0, crossThreadGpuVa, &dispatchInterface, 0u, &args);
kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0x60;
EncodeIndirectParams<FamilyType>::encode(cmdContainer1, crossThreadGpuVa, &dispatchInterface, 0u, &args);
auto used0 = cmdContainer0.getCommandStream()->getUsed();
auto used1 = cmdContainer1.getCommandStream()->getUsed();
auto expectedDiff = sizeof(MI_ARB_CHECK) * 2 + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(expectedDiff, used1 - used0);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer1.getCommandStream()->getCpuBase(), used0), used1 - used0);
auto itor = commands.begin();
itor = find<MI_ARB_CHECK *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_BATCH_BUFFER_START *>(++itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_ARB_CHECK *>(++itor, commands.end());
ASSERT_NE(itor, commands.end());
itor = find<MI_ARB_CHECK *>(++itor, commands.end());
EXPECT_EQ(itor, commands.end());
}
using CommandEncodeAluTests = ::testing::Test;
HWTEST_F(CommandEncodeAluTests, whenAskingForIncrementOrDecrementCmdsSizeThenReturnCorrectValue) {