mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 08:30:34 +08:00
[MLIR][SCFToGPU] Guard operands before AffineApplyOp::create to avoid crash (#167959)
This fixes a crash in SCF→GPU when building the per‑dim index for mapped scf.parallel. **Change**: - Map step/lb through cloningMap, then run ensureLaunchIndependent. - If either is still unavailable at launch scope, emit a match‑failure; otherwise build the affine.apply. **Why this is correct:** - Matches how the pass already handles launch bounds; avoids creating an op with invalid operands and replaces a segfault with a clear diagnostic. **Tests**: - Added two small regressions that lower to gpu.launch and exercise the affine.apply path. Fixes : #167654 Signed-off-by: Shashi Shankar <shashishankar1687@gmail.com>
This commit is contained in:
@@ -453,10 +453,24 @@ static LogicalResult processParallelLoop(
|
||||
1, 2,
|
||||
rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) +
|
||||
rewriter.getAffineSymbolExpr(1));
|
||||
// Map through cloningMap first so we use values valid at the launch
|
||||
// scope, then ensure they are launch-independent (or cloned constants).
|
||||
Value mappedStep = cloningMap.lookupOrDefault(step);
|
||||
Value mappedLowerBound = cloningMap.lookupOrDefault(lowerBound);
|
||||
|
||||
mappedStep = ensureLaunchIndependent(mappedStep);
|
||||
mappedLowerBound = ensureLaunchIndependent(mappedLowerBound);
|
||||
|
||||
// If either cannot be made available above the launch, fail gracefully.
|
||||
if (!mappedStep || !mappedLowerBound) {
|
||||
return rewriter.notifyMatchFailure(
|
||||
parallelOp, "lower bound / step must be constant or defined above "
|
||||
"the gpu.launch");
|
||||
}
|
||||
|
||||
newIndex = AffineApplyOp::create(
|
||||
rewriter, loc, annotation.getMap().compose(lowerAndStep),
|
||||
ValueRange{operand, ensureLaunchIndependent(step),
|
||||
ensureLaunchIndependent(lowerBound)});
|
||||
ValueRange{operand, mappedStep, mappedLowerBound});
|
||||
// If there was also a bound, insert that, too.
|
||||
// TODO: Check that we do not assign bounds twice.
|
||||
if (annotation.getBound()) {
|
||||
|
||||
@@ -673,3 +673,51 @@ func.func @nested_parallel_with_side_effect() {
|
||||
|
||||
// CHECK: gpu.launch
|
||||
// CHECK-NOT: scf.parallel
|
||||
|
||||
// -----
|
||||
|
||||
func.func @scf2gpu_index_creation_2d() {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%c32 = arith.constant 32 : index
|
||||
|
||||
// Single 2-D scf.parallel mapped to block_x and thread_x.
|
||||
// Use both IVs so the conversion must compute indices.
|
||||
scf.parallel (%bx, %tx) = (%c0, %c0) to (%c32, %c32) step (%c1, %c1) {
|
||||
%u = arith.addi %bx, %c0 : index
|
||||
%v = arith.addi %tx, %c0 : index
|
||||
} {
|
||||
mapping = [
|
||||
#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
|
||||
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
|
||||
]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @scf2gpu_index_creation_2d
|
||||
// CHECK: gpu.launch
|
||||
// CHECK: %[[IDX:.*]] = affine.apply
|
||||
// CHECK: arith.addi %[[IDX]],
|
||||
|
||||
// -----
|
||||
|
||||
func.func @scf2gpu_index_creation_1d() {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%c64 = arith.constant 64 : index
|
||||
|
||||
scf.parallel (%t) = (%c0) to (%c64) step (%c1) {
|
||||
%w = arith.addi %t, %c0 : index
|
||||
} {
|
||||
mapping = [
|
||||
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
|
||||
]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @scf2gpu_index_creation_1d
|
||||
// CHECK: gpu.launch
|
||||
// CHECK: %[[IDX:.*]] = affine.apply
|
||||
// CHECK: arith.addi %[[IDX]],
|
||||
|
||||
Reference in New Issue
Block a user