[Flang][OpenMP] Fix for atomic lowering with HLFIR

Atomic update operation is modelled in OpenMP dialect as an operation that takes a reference to the operation being updated. It also contains a region that will perform the update. The block argument represents the loaded value from the update location and the Yield operation is the value that should be stored for the update. OpenMP FIR lowering binds the value loaded from the update address to the SymbolAddress. HLFIR lowering does not permit SymbolAddresses to be a value. To work around this, the lowering is now performed in two steps. First the body of the atomic update is lowered into an SCF execute_region operation. Then this is copied into the omp.atomic_update as a second step that performs the following: -> Create an omp.atomic_update with the block argument of the correct type. -> Copy the operations from the SCF execute_region. Convert the scf.yield to an omp.yield. -> Remove the loads of the update location and replace all uses with the block argument. Reviewed By: tblah, razvanlupusoru Differential Revision: https://reviews.llvm.org/D158294
2026-01-16 05:32:28 +08:00 · 2023-08-31 13:26:35 +00:00
parent a8f53098e3
commit 6163d66e73
2 changed files with 106 additions and 18 deletions
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -26,6 +26,7 @@
 #include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"

 using DeclareTargetCapturePair =
@@ -3094,17 +3095,6 @@ static void genOmpAtomicUpdateStatement(
  if (rightHandClauseList)
    genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
                                          memoryOrder);
-  auto atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
-      currentLocation, lhsAddr, hint, memoryOrder);
-
-  //// Generate body of Atomic Update operation
-  // If an argument for the region is provided then create the block with that
-  // argument. Also update the symbol's address with the argument mlir value.
-  llvm::SmallVector<mlir::Type> varTys = {varType};
-  llvm::SmallVector<mlir::Location> locs = {currentLocation};
-  firOpBuilder.createBlock(&atomicUpdateOp.getRegion(), {}, varTys, locs);
-  mlir::Value val =
-      fir::getBase(atomicUpdateOp.getRegion().front().getArgument(0));
  const auto *varDesignator =
      std::get_if<Fortran::common::Indirection<Fortran::parser::Designator>>(
          &assignmentStmtVariable.u);
@@ -3117,21 +3107,96 @@ static void genOmpAtomicUpdateStatement(
         "Array references as atomic update variable");
  assert(name && name->symbol &&
         "No symbol attached to atomic update variable");
-  converter.bindSymbol(*name->symbol, val);
-  // Set the insert for the terminator operation to go at the end of the
-  // block.
-  mlir::Block &block = atomicUpdateOp.getRegion().back();
-  firOpBuilder.setInsertionPointToEnd(&block);
+  if (Fortran::semantics::IsAllocatableOrPointer(name->symbol->GetUltimate()))
+    converter.bindSymbol(*name->symbol, lhsAddr);

+  //  Lowering is in two steps :
+  //  subroutine sb
+  //    integer :: a, b
+  //    !$omp atomic update
+  //      a = a + b
+  //  end subroutine
+  //
+  //  1. Lower to scf.execute_region_op
+  //
+  //  func.func @_QPsb() {
+  //    %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"}
+  //    %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"}
+  //    %2 = scf.execute_region -> i32 {
+  //      %3 = fir.load %0 : !fir.ref<i32>
+  //      %4 = fir.load %1 : !fir.ref<i32>
+  //      %5 = arith.addi %3, %4 : i32
+  //      scf.yield %5 : i32
+  //    }
+  //    return
+  //  }
+  auto tempOp =
+      firOpBuilder.create<mlir::scf::ExecuteRegionOp>(currentLocation, varType);
+  firOpBuilder.createBlock(&tempOp.getRegion());
+  mlir::Block &block = tempOp.getRegion().back();
+  firOpBuilder.setInsertionPointToEnd(&block);
  Fortran::lower::StatementContext stmtCtx;
  mlir::Value rhsExpr = fir::getBase(converter.genExprValue(
      *Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx));
  mlir::Value convertResult =
      firOpBuilder.createConvert(currentLocation, varType, rhsExpr);
  // Insert the terminator: YieldOp.
-  firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
-  // Reset the insert point to before the terminator.
+  firOpBuilder.create<mlir::scf::YieldOp>(currentLocation, convertResult);
  firOpBuilder.setInsertionPointToStart(&block);
+
+  //  2. Create the omp.atomic.update Operation using the Operations in the
+  //     temporary scf.execute_region Operation.
+  //
+  //  func.func @_QPsb() {
+  //    %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"}
+  //    %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"}
+  //    %2 = fir.load %1 : !fir.ref<i32>
+  //    omp.atomic.update   %0 : !fir.ref<i32> {
+  //    ^bb0(%arg0: i32):
+  //      %3 = fir.load %1 : !fir.ref<i32>
+  //      %4 = arith.addi %arg0, %3 : i32
+  //      omp.yield(%3 : i32)
+  //    }
+  //    return
+  //  }
+  mlir::Value updateVar = converter.getSymbolAddress(*name->symbol);
+  if (auto decl = updateVar.getDefiningOp<hlfir::DeclareOp>())
+    updateVar = decl.getBase();
+
+  firOpBuilder.setInsertionPointAfter(tempOp);
+  auto atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
+      currentLocation, updateVar, hint, memoryOrder);
+
+  llvm::SmallVector<mlir::Type> varTys = {varType};
+  llvm::SmallVector<mlir::Location> locs = {currentLocation};
+  firOpBuilder.createBlock(&atomicUpdateOp.getRegion(), {}, varTys, locs);
+  mlir::Value val =
+      fir::getBase(atomicUpdateOp.getRegion().front().getArgument(0));
+
+  llvm::SmallVector<mlir::Operation *> ops;
+  for (mlir::Operation &op : tempOp.getRegion().getOps())
+    ops.push_back(&op);
+
+  // SCF Yield is converted to OMP Yield. All other operations are copied
+  for (mlir::Operation *op : ops) {
+    if (auto y = mlir::dyn_cast<mlir::scf::YieldOp>(op)) {
+      firOpBuilder.setInsertionPointToEnd(&atomicUpdateOp.getRegion().front());
+      firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, y.getResults());
+      op->erase();
+    } else {
+      op->remove();
+      atomicUpdateOp.getRegion().front().push_back(op);
+    }
+  }
+
+  // Remove the load and replace all uses of load with the block argument
+  for (mlir::Operation &op : atomicUpdateOp.getRegion().getOps()) {
+    fir::LoadOp y = mlir::dyn_cast<fir::LoadOp>(&op);
+    if (y && y.getMemref() == updateVar)
+      y.getRes().replaceAllUsesWith(val);
+  }
+
+  tempOp.erase();
 }

 static void
--- a/flang/test/Lower/OpenMP/atomic-update-hlfir.f90
+++ b/flang/test/Lower/OpenMP/atomic-update-hlfir.f90
@@ -0,0 +1,23 @@
+! This test checks lowering of atomic and atomic update constructs with HLFIR
+! RUN: bbc -hlfir -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+subroutine sb
+  integer :: x, y
+
+  !$omp atomic update
+    x = x + y
+end subroutine
+
+!CHECK-LABEL: @_QPsb
+!CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsbEx"}
+!CHECK:   %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFsbEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   %[[Y_REF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFsbEy"}
+!CHECK:   %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFsbEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:   omp.atomic.update   %[[X_DECL]]#0 : !fir.ref<i32> {
+!CHECK:   ^bb0(%[[ARG_X:.*]]: i32):
+!CHECK:     %[[Y_VAL:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref<i32>
+!CHECK:     %[[X_UPDATE_VAL:.*]] = arith.addi %[[ARG_X]], %[[Y_VAL]] : i32
+!CHECK:     omp.yield(%[[X_UPDATE_VAL]] : i32)
+!CHECK:   }
+!CHECK:   return