mirror of
https://github.com/intel/llvm.git
synced 2026-02-03 19:18:13 +08:00
[OpenMP] Avoid emitting maps for target link variables when unified memory is used
Summary: This patch avoids the emission of maps for target link variables when unified memory is present. Reviewers: ABataev, caomhin Reviewed By: ABataev Subscribers: guansong, jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D60883 llvm-svn: 363435
This commit is contained in:
@@ -8266,7 +8266,8 @@ public:
|
||||
continue;
|
||||
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
|
||||
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
|
||||
if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
|
||||
if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
|
||||
!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
|
||||
continue;
|
||||
StructRangeInfoTy PartialStruct;
|
||||
generateInfoForComponentList(
|
||||
@@ -9251,6 +9252,10 @@ bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
|
||||
return HasRequiresUnifiedSharedMemory;
|
||||
}
|
||||
|
||||
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
|
||||
CodeGenModule &CGM)
|
||||
: CGM(CGM) {
|
||||
|
||||
@@ -1623,6 +1623,9 @@ public:
|
||||
/// the predefined allocator and translates it into the corresponding address
|
||||
/// space.
|
||||
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS);
|
||||
|
||||
/// Return whether the unified_shared_memory has been specified.
|
||||
bool hasRequiresUnifiedSharedMemory() const;
|
||||
};
|
||||
|
||||
/// Class supports emissionof SIMD-only code.
|
||||
|
||||
@@ -2667,7 +2667,8 @@ public:
|
||||
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
|
||||
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
|
||||
if (VD->hasGlobalStorage() && CS && !CS->capturesVariable(VD) &&
|
||||
(!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link))
|
||||
(Stack->hasRequiresDeclWithClause<OMPUnifiedSharedMemoryClause>() ||
|
||||
!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link))
|
||||
return;
|
||||
|
||||
SourceLocation ELoc = E->getExprLoc();
|
||||
|
||||
@@ -26,42 +26,35 @@ int bar(int n){
|
||||
// CHECK: [[VAR:@.+]] = global double 1.000000e+01
|
||||
// CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]]
|
||||
|
||||
// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
|
||||
// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 531]
|
||||
// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
|
||||
// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
|
||||
|
||||
// CHECK: [[N_CASTED:%.+]] = alloca i64
|
||||
// CHECK: [[SUM_CASTED:%.+]] = alloca i64
|
||||
|
||||
// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [3 x i8*]
|
||||
// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [3 x i8*]
|
||||
// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*]
|
||||
// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*]
|
||||
|
||||
// CHECK: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]]
|
||||
// CHECK: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]]
|
||||
|
||||
// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64*
|
||||
// CHECK: store i64 [[LOAD1]], i64* [[BCAST1]]
|
||||
// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
|
||||
// CHECK: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64*
|
||||
// CHECK: store i64 [[LOAD1]], i64* [[BCAST2]]
|
||||
|
||||
// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1
|
||||
// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1
|
||||
// CHECK: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64*
|
||||
// CHECK: store i64 [[LOAD2]], i64* [[BCAST3]]
|
||||
// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1
|
||||
// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1
|
||||
// CHECK: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64*
|
||||
// CHECK: store i64 [[LOAD2]], i64* [[BCAST4]]
|
||||
|
||||
// CHECK: [[BPTR5:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 2
|
||||
// CHECK: [[BCAST5:%.+]] = bitcast i8** [[BPTR5]] to double***
|
||||
// CHECK: store double** [[VAR_DECL_TGT_LINK_PTR]], double*** [[BCAST5]]
|
||||
// CHECK: [[BPTR6:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 2
|
||||
// CHECK: [[BCAST6:%.+]] = bitcast i8** [[BPTR6]] to double**
|
||||
// CHECK: store double* [[VAR]], double** [[BCAST6]]
|
||||
// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
|
||||
|
||||
// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
|
||||
// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
|
||||
|
||||
// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 3, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
|
||||
// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user