Revert "[OMPIRBuilder] Don't use invalid debug loc in reduction functions." (#150832)

Reverts llvm/llvm-project#147950

I noticed some fails in the reduction tests with clang after this
change. I need to understand the failures better. Reverting this for
now.
This commit is contained in:
Abid Qadeer
2025-07-27 16:43:28 +01:00
committed by GitHub
parent f3c750f149
commit 7d7f3819e0
2 changed files with 10 additions and 137 deletions

View File

@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
InsertPointTy SavedIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,7 +2630,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
WcFunc->addParamAttr(1, Attribute::NoUndef);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
@@ -2845,6 +2844,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
}
Builder.CreateRetVoid();
Builder.restoreIP(SavedIP);
return WcFunc;
}
@@ -2853,7 +2853,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
AttributeList FuncAttrs) {
LLVMContext &Ctx = M.getContext();
IRBuilder<>::InsertPointGuard IPG(Builder);
FunctionType *FuncTy =
FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2872,7 +2871,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
SarFunc->addParamAttr(3, Attribute::SExt);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Thread local Reduce list used to host the values of data to be reduced.
Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3019,7 +3017,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3035,7 +3033,6 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3123,13 +3120,14 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
}
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3145,7 +3143,6 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3206,13 +3203,14 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGRFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3228,7 +3226,6 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3314,13 +3311,14 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
}
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
auto *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3336,7 +3334,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3397,6 +3394,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGRFunc;
}
@@ -3409,7 +3407,6 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getPtrTy()},
/* IsVarArg */ false);
@@ -3422,7 +3419,6 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
BasicBlock *EntryBB =
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Need to alloca memory here and deal with the pointers before getting
// LHS/RHS pointers out
@@ -3750,12 +3746,10 @@ static Error populateReductionFunction(
Function *ReductionFunc,
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
IRBuilder<>::InsertPointGuard IPG(Builder);
Module *Module = ReductionFunc->getParent();
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Value *LHSArrayPtr = nullptr;
Value *RHSArrayPtr = nullptr;
if (IsGPU) {

View File

@@ -1,121 +0,0 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
omp.declare_reduction @add_reduction_i32 : i32 init {
^bb0(%arg0: i32 loc("test.f90":8:7)):
%0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2)
omp.yield(%0 : i32) loc(#loc2)
} combiner {
^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)):
%0 = llvm.add %arg0, %arg1 : i32 loc(#loc2)
omp.yield(%0 : i32) loc(#loc2)
} loc(#loc2)
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4)
%1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4)
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
%3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1)
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1)
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1)
%6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5)
%7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5)
%8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5)
%9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5)
%10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5)
%11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6)
%12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6)
llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7)
%15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4)
%16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7)
%17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7)
%18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7)
omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5)
%20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5)
%21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6)
omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) {
omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
omp.distribute {
omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) {
omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) {
llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2)
%22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8)
%23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8)
%34 = llvm.add %22, %23 : i32 loc(#loc8)
llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8)
omp.yield loc(#loc2)
} loc(#loc2)
} {omp.composite} loc(#loc2)
} {omp.composite} loc(#loc2)
omp.terminator loc(#loc2)
} {omp.composite} loc(#loc2)
omp.terminator loc(#loc2)
} loc(#loc2)
omp.terminator loc(#loc2)
} loc(#loc13)
llvm.return loc(#loc9)
} loc(#loc12)
llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> {
%0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6)
llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6)
} loc(#loc6)
} loc(#loc)
#loc = loc("test.f90":4:18)
#loc1 = loc("test.f90":4:18)
#loc2 = loc("test.f90":8:7)
#loc3 = loc("test.f90":1:7)
#loc4 = loc("test.f90":3:18)
#loc5 = loc(unknown)
#loc6 = loc("test.f90":5:18)
#loc7 = loc("test.f90":6:7)
#loc8 = loc("test.f90":10:7)
#loc9 = loc("test.f90":16:7)
#di_file = #llvm.di_file<"target7.f90" in "">
#di_null_type = #llvm.di_null_type
#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>,
sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang",
isOptimized = false, emissionKind = LineTablesOnly>
#di_subroutine_type = #llvm.di_subroutine_type<
callingConvention = DW_CC_program, types = #di_null_type>
#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>,
compileUnit = #di_compile_unit, scope = #di_file, name = "main",
file = #di_file, subprogramFlags = "Definition|MainSubprogram",
type = #di_subroutine_type>
#di_subprogram1 = #llvm.di_subprogram<compileUnit = #di_compile_unit,
name = "target", file = #di_file, subprogramFlags = "Definition",
type = #di_subroutine_type>
#loc12 = loc(fused<#di_subprogram>[#loc3])
#loc13 = loc(fused<#di_subprogram1>[#loc2])
// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1"
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func
// CHECK-NOT: !dbg
// CHECK: }