[OPENMP] Simplify codegen for allocate directive on local variables.

Simplified codegen for the allocate directive for local variables,
initial implementation of the codegen for NVPTX target.

llvm-svn: 356710
This commit is contained in:
Alexey Bataev
2019-03-21 20:36:16 +00:00
parent 7339e61b89
commit 084b0c2f03
4 changed files with 71 additions and 56 deletions

View File

@@ -9745,54 +9745,50 @@ public:
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
if (!VD)
return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return Address::invalid();
for (const Attr *A: CVD->getAttrs()) {
if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
if (!Elem.second.ServiceInsertPt)
setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
Size = CGF.getTypeSize(CVD->getType());
Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
} else {
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
Align = CGM.getContext().getDeclAlign(CVD);
Size = CGM.getSize(Sz.alignTo(Align));
}
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
llvm::Value *Allocator;
if (const Expr *AllocExpr = AA->getAllocator()) {
Allocator = CGF.EmitScalarExpr(AllocExpr);
} else {
// Default allocator in libomp is nullptr.
Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
}
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr =
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
CVD->getName() + ".void.addr");
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
ThreadID, Addr, Allocator};
llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(
CGM.getContext().getPointerType(CVD->getType())),
CVD->getName() + ".addr");
return Address(Addr, Align);
}
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc)
return Address::invalid();
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
if (!Elem.second.ServiceInsertPt)
setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
Size = CGF.getTypeSize(CVD->getType());
Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
} else {
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
Align = CGM.getContext().getDeclAlign(CVD);
Size = CGM.getSize(Sz.alignTo(Align));
}
return Address::invalid();
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
llvm::Value *Args[] = {ThreadID, Size, Allocator};
llvm::Value *Addr =
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
CVD->getName() + ".void.addr");
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
CVD->getName() + ".addr");
return Address(Addr, Align);
}
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(

View File

@@ -4725,6 +4725,28 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
bool UseDefaultAllocator = true;
if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch (A->getAllocatorType()) {
// Use the default allocator here as by default local vars are
// threadlocal.
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
// Just pass-through to check if the globalization is required.
break;
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
case OMPAllocateDeclAttr::OMPConstMemAlloc:
case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
UseDefaultAllocator = false;
break;
}
}
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
return Address::invalid();
@@ -4746,7 +4768,9 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
return VDI->second.PrivateAddr;
}
}
// TODO: replace it with return
// UseDefaultAllocator ? Address::invalid :
// CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
// supports __kmpc_alloc|__kmpc_free.
return Address::invalid();

View File

@@ -67,25 +67,15 @@ int main () {
static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
// CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
// CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
// CHECK: store double 3.000000e+00, double* [[B_ADDR]],
// CHECK: [[RES:%.+]] = call i32 [[FOO:@.+]]()
// CHECK: store i32 [[RES]], i32* [[RET:%.+]],
// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
// CHECK: alloca double,
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
double b = 3;
#pragma omp allocate(b)
// CHECK: [[RETVAL:%.+]] = load i32, i32* [[RET]],
// CHECK: ret i32 [[RETVAL]]
return (foo<int>());
}
// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
// CHECK: define {{.*}}i32 [[FOO]]()
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
// CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])

View File

@@ -57,7 +57,9 @@ namespace ns{
}
#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
// CHECK-LABEL: @main
int main () {
// CHECK: alloca double,
static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
@@ -66,6 +68,9 @@ int main () {
return (foo<int>());
}
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: alloca i32,
extern template int ST<int>::m;
#pragma omp end declare target
#endif