fix: reset all slm arguments of inactive kernel in mutable group

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-10-08 13:19:42 +00:00
committed by Compute-Runtime-Automation
parent 57add55b20
commit e768366eac
4 changed files with 72 additions and 9 deletions

View File

@@ -390,7 +390,8 @@ inline ze_result_t MutableCommandListCoreFamily<gfxCoreFamily>::appendLaunchKern
this->appendKernelMutableComputeWalker = (*mutableWalkerCmds.rbegin()).get();
retVal = this->parseDispatchedKernel(kernel, appendKernelMutableComputeWalker, mutableCmdlistAppendLaunchParams.extraPayloadSpaceForKernelGroup,
static_cast<L0::KernelImp *>(kernel)->getSyncBufferAllocation(),
static_cast<L0::KernelImp *>(kernel)->getRegionGroupBarrierAllocation());
static_cast<L0::KernelImp *>(kernel)->getRegionGroupBarrierAllocation(),
false);
if (retVal != ZE_RESULT_SUCCESS) {
return retVal;
}
@@ -910,7 +911,8 @@ ze_result_t MutableCommandListCoreFamily<gfxCoreFamily>::captureKernelGroupVaria
viewKernelMutableComputeWalker,
(parentMutableAppendLaunchParams.maxKernelGroupIndirectHeap - mutableKernel->getKernel()->getIndirectSize()),
nullptr,
nullptr);
nullptr,
true);
if (retVal != ZE_RESULT_SUCCESS) {
return retVal;
}
@@ -955,9 +957,6 @@ ze_result_t MutableCommandListCoreFamily<gfxCoreFamily>::captureKernelGroupVaria
if (viewKernelAppendLaunchParams.globalOffsetVariable != nullptr) {
viewKernelAppendLaunchParams.globalOffsetVariable->resetGlobalOffsetVariable();
}
if (viewKernelAppendLaunchParams.lastSlmArgumentVariable != nullptr) {
viewKernelAppendLaunchParams.lastSlmArgumentVariable->resetSlmVariable();
}
return retVal;
}

View File

@@ -261,7 +261,8 @@ KernelData *MutableCommandListImp::getKernelData(L0::Kernel *kernel) {
}
ze_result_t MutableCommandListImp::parseDispatchedKernel(L0::Kernel *kernel, MutableComputeWalker *mutableComputeWalker,
size_t extraHeapSize, NEO::GraphicsAllocation *syncBuffer, NEO::GraphicsAllocation *regionBarrier) {
size_t extraHeapSize, NEO::GraphicsAllocation *syncBuffer, NEO::GraphicsAllocation *regionBarrier,
bool resetSlmArgumentValues) {
auto kernelData = getKernelData(kernel);
auto &kernelDescriptor = kernel->getKernelDescriptor();
@@ -336,13 +337,20 @@ ze_result_t MutableCommandListImp::parseDispatchedKernel(L0::Kernel *kernel, Mut
DEBUG_BREAK_IF(args.size() != vars.size());
auto &slmArgOffsetValues = static_cast<L0::KernelImp *>(kernel)->getSlmArgOffsetValues();
auto &slmArgSizes = static_cast<L0::KernelImp *>(kernel)->getSlmArgSizes();
auto currentSlmArgSize = undefined<SlmOffset>;
auto currentSlmArgOffset = undefined<SlmOffset>;
for (size_t i = 0; i < args.size(); ++i) {
if (vars[i] == nullptr) {
continue;
}
if (resetSlmArgumentValues == false) {
currentSlmArgSize = slmArgSizes[i];
currentSlmArgOffset = slmArgOffsetValues[i];
}
auto retVal = Variable::fromHandle(vars[i])->addKernelArgUsage(args[i], kernelIohStartOffset, kernelFullOffset, kernelSshOffset,
slmArgSizes[i], slmArgOffsetValues[i],
walkerCmdOffset, mutableComputeWalker, kernelData->passInlineData);
currentSlmArgSize, currentSlmArgOffset,
walkerCmdOffset, mutableComputeWalker,
kernelData->passInlineData);
if (retVal != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return retVal;

View File

@@ -150,7 +150,8 @@ struct MutableCommandListImp : public MutableCommandList {
protected:
ze_result_t parseDispatchedKernel(L0::Kernel *kernel, MutableComputeWalker *mutableComputeWalker,
size_t extraHeapSize, NEO::GraphicsAllocation *syncBuffer, NEO::GraphicsAllocation *regionBarrier);
size_t extraHeapSize, NEO::GraphicsAllocation *syncBuffer, NEO::GraphicsAllocation *regionBarrier,
bool resetSlmArgumentValues);
ze_result_t addVariableDispatch(const NEO::KernelDescriptor &kernelDescriptor, KernelDispatch &kernelDispatch, Variable *groupSize, Variable *groupCount, Variable *globalOffset,
Variable *lastSlmArgumentVariable, MutableComputeWalker *mutableComputeWalker, const MutableKernelDispatchParameters &dispatchParams);

View File

@@ -2258,5 +2258,60 @@ HWCMDTEST_F(IGFX_XE_HP_CORE,
EXPECT_TRUE(isAllocationInMutableResidency(mutableCommandList.get(), kernelSlmRegularIsaAllocation));
}
HWCMDTEST_F(IGFX_XE_HP_CORE,
MutableCommandListKernelTest,
givenKernelWithTwoSlmArgumentsWhenProvidedAsInactiveInMutableKernelGroupThenItsSlmVariablesAreUndefined) {
// set kernel arg 0, 1 => slm, slm
resizeKernelArg(2);
prepareKernelArg(0, L0::MCL::VariableType::slmBuffer, kernelAllMask);
prepareKernelArg(1, L0::MCL::VariableType::slmBuffer, kernelAllMask);
uint32_t slmSize = 512;
auto result = kernel->setArgBuffer(0, slmSize, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = kernel->setArgBuffer(1, slmSize, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = kernel2->setArgBuffer(0, slmSize, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = kernel2->setArgBuffer(1, slmSize, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mutableCommandIdDesc.flags = kernelIsaMutationFlags;
result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 2, kernelMutationGroup, &commandId);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = mutableCommandList->appendLaunchKernel(kernelHandle, this->testGroupCount, nullptr, 0, nullptr, this->testLaunchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = mutableCommandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernelSlmBufferVariables = getVariableList(commandId, L0::MCL::VariableType::slmBuffer, kernel.get());
ASSERT_EQ(2u, kernelSlmBufferVariables.size());
auto kernel1SlmBufferVariable1 = static_cast<Variable *>(kernelSlmBufferVariables[0]);
EXPECT_EQ(slmSize, kernel1SlmBufferVariable1->slmValue.slmSize);
EXPECT_NE(undefined<L0::MCL::SlmOffset>, kernel1SlmBufferVariable1->slmValue.slmOffsetValue);
auto kernel1SlmBufferVariable2 = static_cast<Variable *>(kernelSlmBufferVariables[0]);
EXPECT_EQ(slmSize, kernel1SlmBufferVariable2->slmValue.slmSize);
EXPECT_NE(undefined<L0::MCL::SlmOffset>, kernel1SlmBufferVariable2->slmValue.slmOffsetValue);
kernelSlmBufferVariables = getVariableList(commandId, L0::MCL::VariableType::slmBuffer, kernel2.get());
ASSERT_EQ(2u, kernelSlmBufferVariables.size());
auto kernel2SlmBufferVariable1 = static_cast<Variable *>(kernelSlmBufferVariables[0]);
EXPECT_EQ(undefined<L0::MCL::SlmOffset>, kernel2SlmBufferVariable1->slmValue.slmSize);
EXPECT_EQ(undefined<L0::MCL::SlmOffset>, kernel2SlmBufferVariable1->slmValue.slmOffsetValue);
auto kernel2SlmBufferVariable2 = static_cast<Variable *>(kernelSlmBufferVariables[0]);
EXPECT_EQ(undefined<L0::MCL::SlmOffset>, kernel2SlmBufferVariable2->slmValue.slmSize);
EXPECT_EQ(undefined<L0::MCL::SlmOffset>, kernel2SlmBufferVariable2->slmValue.slmOffsetValue);
}
} // namespace ult
} // namespace L0