mirror of
https://github.com/intel/llvm.git
synced 2026-02-02 02:00:03 +08:00
[CUDA] Mark all CUDA device-side function defs, decls, and calls as convergent.
Summary:
This is important for e.g. the following case:
void sync() { __syncthreads(); }
void foo() {
do_something();
sync();
do_something_else():
}
Without this change, if the optimizer does not inline sync() (which it
won't because __syncthreads is also marked as noduplicate, for now
anyway), it is free to perform optimizations on sync() that it would not
be able to perform on __syncthreads(), because sync() is not marked as
convergent.
Similarly, we need a notion of convergent calls, since in the case when
we can't statically determine a call's target(s), we need to know
whether it's safe to perform optimizations around the call.
This change is conservative; the optimizer will remove these attrs where
it can, see r260318, r260319.
Reviewers: majnemer
Subscribers: cfe-commits, jhen, echristo, tra
Differential Revision: http://reviews.llvm.org/D17056
llvm-svn: 261779
This commit is contained in:
@@ -1595,6 +1595,14 @@ void CodeGenModule::ConstructAttributeList(
|
||||
}
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
|
||||
// Conservatively, mark all functions and calls in CUDA as convergent
|
||||
// (meaning, they may call an intrinsically convergent op, such as
|
||||
// __syncthreads(), and so can't have certain optimizations applied around
|
||||
// them). LLVM will remove this attribute where it safely can.
|
||||
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
|
||||
}
|
||||
|
||||
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
|
||||
|
||||
QualType RetTy = FI.getReturnType();
|
||||
|
||||
39
clang/test/CodeGenCUDA/convergent.cu
Normal file
39
clang/test/CodeGenCUDA/convergent.cu
Normal file
@@ -0,0 +1,39 @@
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
|
||||
// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s
|
||||
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
|
||||
// RUN: -disable-llvm-passes -o - %s | \
|
||||
// RUN: FileCheck -check-prefix HOST %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// DEVICE: Function Attrs:
|
||||
// DEVICE-SAME: convergent
|
||||
// DEVICE-NEXT: define void @_Z3foov
|
||||
__device__ void foo() {}
|
||||
|
||||
// HOST: Function Attrs:
|
||||
// HOST-NOT: convergent
|
||||
// HOST-NEXT: define void @_Z3barv
|
||||
// DEVICE: Function Attrs:
|
||||
// DEVICE-SAME: convergent
|
||||
// DEVICE-NEXT: define void @_Z3barv
|
||||
__host__ __device__ void baz();
|
||||
__host__ __device__ void bar() {
|
||||
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
|
||||
baz();
|
||||
}
|
||||
|
||||
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
|
||||
// DEVICE: attributes [[BAZ_ATTR]] = {
|
||||
// DEVICE-SAME: convergent
|
||||
// DEVICE-SAME: }
|
||||
// DEVICE: attributes [[CALL_ATTR]] = { convergent }
|
||||
|
||||
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
|
||||
// HOST: attributes [[BAZ_ATTR]] = {
|
||||
// HOST-NOT: convergent
|
||||
// NOST-SAME: }
|
||||
@@ -382,7 +382,7 @@ __device__ void df() {
|
||||
// CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
|
||||
// CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
|
||||
// CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
|
||||
// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3
|
||||
// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
|
||||
// CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
|
||||
// CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
|
||||
// CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
|
||||
|
||||
Reference in New Issue
Block a user