fix: check all args in checkKernelContainsStatefulAccess function

Related-To: NEO-16281, NEO-16405
Signed-off-by: Jaroslaw Warchulski <jaroslaw.warchulski@intel.com>
This commit is contained in:
Jaroslaw Warchulski
2025-11-04 17:16:04 +00:00
committed by Compute-Runtime-Automation
parent 2d3781b0d4
commit 45bbb7d6c4
6 changed files with 130 additions and 16 deletions

View File

@@ -1640,8 +1640,8 @@ bool KernelImp::checkKernelContainsStatefulAccess() {
auto moduleImp = static_cast<ModuleImp *>(this->module); auto moduleImp = static_cast<ModuleImp *>(this->module);
auto isUserKernel = (moduleImp->getModuleType() == ModuleType::user); auto isUserKernel = (moduleImp->getModuleType() == ModuleType::user);
auto isGeneratedByIgc = moduleImp->getTranslationUnit()->isGeneratedByIgc; auto isGeneratedByIgc = moduleImp->getTranslationUnit()->isGeneratedByIgc;
auto containsBufferStatefulAccess = NEO::AddressingModeHelper::containsBufferStatefulAccess(getKernelDescriptor(), false); auto containsStatefulAccess = NEO::AddressingModeHelper::containsStatefulAccess(getKernelDescriptor());
return containsBufferStatefulAccess && isUserKernel && isGeneratedByIgc; return containsStatefulAccess && isUserKernel && isGeneratedByIgc;
} }
uint8_t KernelImp::getRequiredSlmAlignment(uint32_t argIndex) const { uint8_t KernelImp::getRequiredSlmAlignment(uint32_t argIndex) const {

View File

@@ -3753,9 +3753,13 @@ TEST_F(CommandListAppendLaunchKernelWithArgumentsTests, whenAppendLaunchKernelWi
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::SurfaceStateHeapOffset>; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>().bindless = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[1].type = NEO::ArgDescriptor::argTImage; // arg image kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[1].type = NEO::ArgDescriptor::argTImage; // arg image
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[1].as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[1].as<NEO::ArgDescImage>().bindless = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[2].type = NEO::ArgDescriptor::argTSampler; // arg sampler kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[2].type = NEO::ArgDescriptor::argTSampler; // arg sampler
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].type = NEO::ArgDescriptor::argTValue; // arg immediate kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[2].as<NEO::ArgDescSampler>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[2].as<NEO::ArgDescSampler>().bindless = NEO::undefined<NEO::SurfaceStateHeapOffset>;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].type = NEO::ArgDescriptor::argTValue; // arg immediate
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements.resize(3); kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements.resize(3);
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements[0].size = 4; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements[0].size = 4;
kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements[0].sourceOffset = 0; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[3].as<NEO::ArgDescValue>().elements[0].sourceOffset = 0;

View File

@@ -41,8 +41,8 @@ inline void HardwareInterface<GfxFamily>::dispatchWorkarounds(
CommandQueue &commandQueue, CommandQueue &commandQueue,
Kernel &kernel, Kernel &kernel,
const bool &enable) { const bool &enable) {
bool containsBufferStatefulAccess = AddressingModeHelper::containsBufferStatefulAccess(kernel.getDescriptor(), false); bool containsStatefulAccess = AddressingModeHelper::containsStatefulAccess(kernel.getDescriptor());
bool stateCacheInvalidationWaRequired = commandQueue.getDevice().getReleaseHelper()->isStateCacheInvalidationWaRequired() && containsBufferStatefulAccess; bool stateCacheInvalidationWaRequired = commandQueue.getDevice().getReleaseHelper()->isStateCacheInvalidationWaRequired() && containsStatefulAccess;
if (!enable && stateCacheInvalidationWaRequired) { if (!enable && stateCacheInvalidationWaRequired) {
PipeControlArgs args{}; PipeControlArgs args{};
args.stateCacheInvalidationEnable = true; args.stateCacheInvalidationEnable = true;

View File

@@ -55,6 +55,27 @@ bool containsBufferStatefulAccess(const std::vector<KernelInfo *> &kernelInfos,
return false; return false;
} }
bool argIsStateful(const ArgDescriptor &arg) {
if (arg.is<NEO::ArgDescriptor::argTPointer>()) {
return (NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindless) || NEO::isValidOffset(arg.as<NEO::ArgDescPointer>().bindful));
} else if (arg.is<NEO::ArgDescriptor::argTImage>()) {
return (NEO::isValidOffset(arg.as<NEO::ArgDescImage>().bindless) || NEO::isValidOffset(arg.as<NEO::ArgDescImage>().bindful));
} else if (arg.is<NEO::ArgDescriptor::argTSampler>()) {
return (NEO::isValidOffset(arg.as<NEO::ArgDescSampler>().bindless) || NEO::isValidOffset(arg.as<NEO::ArgDescSampler>().bindful));
}
return false;
}
bool containsStatefulAccess(const KernelDescriptor &kernelDescriptor) {
auto size = static_cast<int32_t>(kernelDescriptor.payloadMappings.explicitArgs.size());
for (auto i = 0; i < size; i++) {
if (argIsStateful(kernelDescriptor.payloadMappings.explicitArgs[i])) {
return true;
}
}
return false;
}
bool containsBindlessKernel(const std::vector<KernelInfo *> &kernelInfos) { bool containsBindlessKernel(const std::vector<KernelInfo *> &kernelInfos) {
for (const auto &kernelInfo : kernelInfos) { for (const auto &kernelInfo : kernelInfos) {
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {

View File

@@ -17,6 +17,7 @@ namespace AddressingModeHelper {
bool failBuildProgramWithBufferStatefulAccess(const RootDeviceEnvironment &rootDeviceEnvironment); bool failBuildProgramWithBufferStatefulAccess(const RootDeviceEnvironment &rootDeviceEnvironment);
bool containsBufferStatefulAccess(const KernelDescriptor &kernelDescriptor, bool skipLastExplicitArg); bool containsBufferStatefulAccess(const KernelDescriptor &kernelDescriptor, bool skipLastExplicitArg);
bool containsBufferStatefulAccess(const std::vector<KernelInfo *> &kernelInfos, bool skipLastExplicitArg); bool containsBufferStatefulAccess(const std::vector<KernelInfo *> &kernelInfos, bool skipLastExplicitArg);
bool containsStatefulAccess(const KernelDescriptor &kernelDescriptor);
bool containsBindlessKernel(const std::vector<KernelInfo *> &kernelInfos); bool containsBindlessKernel(const std::vector<KernelInfo *> &kernelInfos);
} // namespace AddressingModeHelper } // namespace AddressingModeHelper

View File

@@ -12,59 +12,147 @@
using namespace NEO; using namespace NEO;
TEST(AddressingModeHelperTest, GivenArgIsNotPointerWhenCheckingForStatefulAccessThenReturnFalse) { TEST(AddressingModeHelperTest, GivenArgIsNotPointerWhenCheckingForBufferStatefulAccessThenReturnFalse) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTValue); auto argDescriptor = ArgDescriptor(ArgDescriptor::argTValue);
KernelDescriptor kernelDescriptor; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false)); EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false));
} }
TEST(AddressingModeHelperTest, GivenArgIsPointerWithInvalidStatefulOffsetWhenCheckingForStatefulAccessThenReturnFalse) { TEST(AddressingModeHelperTest, GivenArgIsPointerWithInvalidStatefulOffsetWhenCheckingForBufferStatefulAccessThenReturnFalse) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer); auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer);
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>; argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>; argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
KernelDescriptor kernelDescriptor; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false)); EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false));
} }
TEST(AddressingModeHelperTest, GivenArgIsPointerWithValidBindfulOffsetWhenCheckingForStatefulAccessThenReturnTrue) { TEST(AddressingModeHelperTest, GivenArgIsPointerWithValidBindfulOffsetWhenCheckingForBufferStatefulAccessThenReturnTrue) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer); auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer);
argDescriptor.as<ArgDescPointer>().bindful = 0x40; argDescriptor.as<ArgDescPointer>().bindful = 0x40;
argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>; argDescriptor.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
KernelDescriptor kernelDescriptor; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_TRUE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false)); EXPECT_TRUE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false));
} }
TEST(AddressingModeHelperTest, GivenArgIsPointerWithValidBindlessOffsetWhenCheckingForStatefulAccessThenReturnTrue) { TEST(AddressingModeHelperTest, GivenArgIsPointerWithValidBindlessOffsetWhenCheckingForBufferStatefulAccessThenReturnTrue) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer); auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer);
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>; argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = 0x40; argDescriptor.as<ArgDescPointer>().bindless = 0x40;
KernelDescriptor kernelDescriptor; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_TRUE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false)); EXPECT_TRUE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, false));
} }
TEST(AddressingModeHelperTest, GivenLastArgIsPointerWithValidBindlessOffsetWhenIgnoreLastArgAndCheckingForStatefulAccessThenReturnFalse) { TEST(AddressingModeHelperTest, GivenLastArgIsPointerWithValidBindlessOffsetWhenIgnoreLastArgAndCheckingForBufferStatefulAccessThenReturnFalse) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer); auto argDescriptor = ArgDescriptor(ArgDescriptor::argTPointer);
argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>; argDescriptor.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argDescriptor.as<ArgDescPointer>().bindless = 0x40; argDescriptor.as<ArgDescPointer>().bindless = 0x40;
KernelDescriptor kernelDescriptor; KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor); kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, true)); EXPECT_FALSE(AddressingModeHelper::containsBufferStatefulAccess(kernelDescriptor, true));
} }
TEST(AddressingModeHelperTest, GivenInvalidArgWhenCheckingForStatefulAccessThenReturnFalse) {
auto argDescriptor = ArgDescriptor(ArgDescriptor::argTUnknown);
KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
EXPECT_FALSE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
}
TEST(AddressingModeHelperTest, GivenValidArgWithInvalidStatefulOffsetWhenCheckingForStatefulAccessThenReturnFalse) {
auto argTPointer = ArgDescriptor(ArgDescriptor::argTPointer);
argTPointer.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argTPointer.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
auto argTImage = ArgDescriptor(ArgDescriptor::argTImage);
argTImage.as<ArgDescImage>().bindful = undefined<SurfaceStateHeapOffset>;
argTImage.as<ArgDescImage>().bindless = undefined<CrossThreadDataOffset>;
auto argTSampler = ArgDescriptor(ArgDescriptor::argTSampler);
argTSampler.as<ArgDescSampler>().bindful = undefined<SurfaceStateHeapOffset>;
argTSampler.as<ArgDescSampler>().bindless = undefined<CrossThreadDataOffset>;
KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTPointer);
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTImage);
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTSampler);
EXPECT_FALSE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
}
TEST(AddressingModeHelperTest, GivenValidArgWithValidBindfulOffsetWhenCheckingForStatefulAccessThenReturnTrue) {
auto argTPointer = ArgDescriptor(ArgDescriptor::argTPointer);
argTPointer.as<ArgDescPointer>().bindful = 0x40;
argTPointer.as<ArgDescPointer>().bindless = undefined<CrossThreadDataOffset>;
KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTPointer);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
auto argTImage = ArgDescriptor(ArgDescriptor::argTImage);
argTImage.as<ArgDescImage>().bindful = 0x40;
argTImage.as<ArgDescImage>().bindless = undefined<CrossThreadDataOffset>;
kernelDescriptor.payloadMappings.explicitArgs.clear();
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTImage);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
auto argTSampler = ArgDescriptor(ArgDescriptor::argTSampler);
argTSampler.as<ArgDescSampler>().bindful = 0x40;
argTSampler.as<ArgDescSampler>().bindless = undefined<CrossThreadDataOffset>;
kernelDescriptor.payloadMappings.explicitArgs.clear();
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTSampler);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
}
TEST(AddressingModeHelperTest, GivenValidArgWithValidBindlessOffsetWhenCheckingForStatefulAccessThenReturnTrue) {
auto argTPointer = ArgDescriptor(ArgDescriptor::argTPointer);
argTPointer.as<ArgDescPointer>().bindful = undefined<SurfaceStateHeapOffset>;
argTPointer.as<ArgDescPointer>().bindless = 0x40;
KernelDescriptor kernelDescriptor{};
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTPointer);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
auto argTImage = ArgDescriptor(ArgDescriptor::argTImage);
argTImage.as<ArgDescImage>().bindful = undefined<SurfaceStateHeapOffset>;
argTImage.as<ArgDescImage>().bindless = 0x40;
kernelDescriptor.payloadMappings.explicitArgs.clear();
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTImage);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
auto argTSampler = ArgDescriptor(ArgDescriptor::argTSampler);
argTSampler.as<ArgDescSampler>().bindful = undefined<SurfaceStateHeapOffset>;
argTSampler.as<ArgDescSampler>().bindless = 0x40;
kernelDescriptor.payloadMappings.explicitArgs.clear();
kernelDescriptor.payloadMappings.explicitArgs.push_back(argTSampler);
EXPECT_TRUE(AddressingModeHelper::containsStatefulAccess(kernelDescriptor));
}
TEST(AddressingModeHelperTest, GivenKernelInfosWhenCheckingForBindlessKernelThenReturnCorrectValue) { TEST(AddressingModeHelperTest, GivenKernelInfosWhenCheckingForBindlessKernelThenReturnCorrectValue) {
KernelInfo kernelInfo1{}; KernelInfo kernelInfo1{};
KernelInfo kernelInfo2{}; KernelInfo kernelInfo2{};