diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8a25148f2f3c..67014bde7df7 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4149,28 +4149,30 @@ struct AAKernelInfoFunction : AAKernelInfo { updateReachingKernelEntries(A, AllReachingKernelsKnown); UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; - if (!ParallelLevels.isValidState()) - SPMDCompatibilityTracker.indicatePessimisticFixpoint(); - else if (!ReachingKernelEntries.isValidState()) - SPMDCompatibilityTracker.indicatePessimisticFixpoint(); - else if (!SPMDCompatibilityTracker.empty()) { - // Check if all reaching kernels agree on the mode as we can otherwise - // not guard instructions. We might not be sure about the mode so we - // we cannot fix the internal spmd-zation state either. - int SPMD = 0, Generic = 0; - for (auto *Kernel : ReachingKernelEntries) { - auto &CBAA = A.getAAFor( - *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); - if (CBAA.SPMDCompatibilityTracker.isValidState() && - CBAA.SPMDCompatibilityTracker.isAssumed()) - ++SPMD; - else - ++Generic; - if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint()) - UsedAssumedInformationFromReachingKernels = true; - } - if (SPMD != 0 && Generic != 0) + if (!SPMDCompatibilityTracker.empty()) { + if (!ParallelLevels.isValidState()) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else if (!ReachingKernelEntries.isValidState()) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else { + // Check if all reaching kernels agree on the mode as we can otherwise + // not guard instructions. We might not be sure about the mode so we + // we cannot fix the internal spmd-zation state either. + int SPMD = 0, Generic = 0; + for (auto *Kernel : ReachingKernelEntries) { + auto &CBAA = A.getAAFor( + *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); + if (CBAA.SPMDCompatibilityTracker.isValidState() && + CBAA.SPMDCompatibilityTracker.isAssumed()) + ++SPMD; + else + ++Generic; + if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint()) + UsedAssumedInformationFromReachingKernels = true; + } + if (SPMD != 0 && Generic != 0) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + } } } diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll index e1971dd42c6c..686d487ff2a9 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll @@ -43,14 +43,13 @@ target triple = "nvptx64" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 -; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 -; CHECK: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 @@ -67,46 +66,12 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: is_worker_check: -; CHECK-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -; CHECK-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() -; CHECK-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] -; CHECK-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] -; CHECK-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -297,7 +262,7 @@ define internal void @spmd_helper() #1 { ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-NEXT: call void @leaf() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@spmd_helper