mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 17:01:00 +08:00
[mlir][acc] Erase empty kernel_environment ops during canonicalization (#166633)
This change removes empty `acc.kernel_environment` operations during canonicalization. This could happen when the acc compute construct inside the `acc.kernel_environment` is optimized away in cases such as when only private variables are being written to in the loop. In cases of empty `acc.kernel_environment` ops with waitOperands, we still remove the empty `acc.kernel_environment`, but also create an `acc.wait` operation to take those wait operands to preserve synchronization behavior.
This commit is contained in:
@@ -2184,6 +2184,8 @@ def OpenACC_KernelEnvironmentOp : OpenACC_Op<"kernel_environment",
|
||||
)
|
||||
$region attr-dict
|
||||
}];
|
||||
|
||||
let hasCanonicalizer = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -1042,6 +1042,65 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
|
||||
}
|
||||
};
|
||||
|
||||
/// Remove empty acc.kernel_environment operations. If the operation has wait
|
||||
/// operands, create a acc.wait operation to preserve synchronization.
|
||||
struct RemoveEmptyKernelEnvironment
|
||||
: public OpRewritePattern<acc::KernelEnvironmentOp> {
|
||||
using OpRewritePattern<acc::KernelEnvironmentOp>::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(acc::KernelEnvironmentOp op,
|
||||
PatternRewriter &rewriter) const override {
|
||||
assert(op->getNumRegions() == 1 && "expected op to have one region");
|
||||
|
||||
Block &block = op.getRegion().front();
|
||||
if (!block.empty())
|
||||
return failure();
|
||||
|
||||
// Conservatively disable canonicalization of empty acc.kernel_environment
|
||||
// operations if the wait operands in the kernel_environment cannot be fully
|
||||
// represented by acc.wait operation.
|
||||
|
||||
// Disable canonicalization if device type is not the default
|
||||
if (auto deviceTypeAttr = op.getWaitOperandsDeviceTypeAttr()) {
|
||||
for (auto attr : deviceTypeAttr) {
|
||||
if (auto dtAttr = mlir::dyn_cast<acc::DeviceTypeAttr>(attr)) {
|
||||
if (dtAttr.getValue() != mlir::acc::DeviceType::None)
|
||||
return failure();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disable canonicalization if any wait segment has a devnum
|
||||
if (auto hasDevnumAttr = op.getHasWaitDevnumAttr()) {
|
||||
for (auto attr : hasDevnumAttr) {
|
||||
if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(attr)) {
|
||||
if (boolAttr.getValue())
|
||||
return failure();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disable canonicalization if there are multiple wait segments
|
||||
if (auto segmentsAttr = op.getWaitOperandsSegmentsAttr()) {
|
||||
if (segmentsAttr.size() > 1)
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Remove empty kernel environment.
|
||||
// Preserve synchronization by creating acc.wait operation if needed.
|
||||
if (!op.getWaitOperands().empty() || op.getWaitOnlyAttr())
|
||||
rewriter.replaceOpWithNewOp<acc::WaitOp>(op, op.getWaitOperands(),
|
||||
/*asyncOperand=*/Value(),
|
||||
/*waitDevnum=*/Value(),
|
||||
/*async=*/nullptr,
|
||||
/*ifCond=*/Value());
|
||||
else
|
||||
rewriter.eraseOp(op);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Recipe Region Helpers
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -2690,6 +2749,15 @@ void acc::HostDataOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
||||
results.add<RemoveConstantIfConditionWithRegion<HostDataOp>>(context);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KernelEnvironmentOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void acc::KernelEnvironmentOp::getCanonicalizationPatterns(
|
||||
RewritePatternSet &results, MLIRContext *context) {
|
||||
results.add<RemoveEmptyKernelEnvironment>(context);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoopOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -219,3 +219,30 @@ func.func @update_unnecessary_computations(%x: memref<i32>) {
|
||||
// CHECK-LABEL: func.func @update_unnecessary_computations
|
||||
// CHECK-NOT: acc.atomic.update
|
||||
// CHECK: acc.atomic.write
|
||||
|
||||
// -----
|
||||
|
||||
func.func @kernel_environment_canonicalization(%q1: i32, %q2: i32, %q3: i32) {
|
||||
// Empty kernel_environment (no wait) - should be removed
|
||||
acc.kernel_environment {
|
||||
}
|
||||
|
||||
acc.kernel_environment wait({%q1 : i32, %q2 : i32}) {
|
||||
}
|
||||
|
||||
acc.kernel_environment wait {
|
||||
}
|
||||
|
||||
acc.kernel_environment wait({%q3 : i32} [#acc.device_type<nvidia>]) {
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @kernel_environment_canonicalization
|
||||
// CHECK-SAME: ([[Q1:%.*]]: i32, [[Q2:%.*]]: i32, [[Q3:%.*]]: i32)
|
||||
// CHECK-NOT: acc.kernel_environment wait({{.*}}[#acc.device_type<none>])
|
||||
// CHECK: acc.wait([[Q1]], [[Q2]] : i32, i32)
|
||||
// CHECK: acc.wait{{$}}
|
||||
// CHECK: acc.kernel_environment wait({{.*}}[#acc.device_type<nvidia>])
|
||||
// CHECK: return
|
||||
|
||||
Reference in New Issue
Block a user