mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[OPENMP] Simplify codegen for allocate directive on local variables.
Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. llvm-svn: 356710
This commit is contained in:
@@ -9745,54 +9745,50 @@ public:
|
||||
|
||||
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
const VarDecl *VD) {
|
||||
if (!VD)
|
||||
return Address::invalid();
|
||||
const VarDecl *CVD = VD->getCanonicalDecl();
|
||||
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
|
||||
return Address::invalid();
|
||||
for (const Attr *A: CVD->getAttrs()) {
|
||||
if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
|
||||
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
||||
if (!Elem.second.ServiceInsertPt)
|
||||
setLocThreadIdInsertPt(CGF);
|
||||
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
||||
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
|
||||
llvm::Value *Size;
|
||||
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
|
||||
if (CVD->getType()->isVariablyModifiedType()) {
|
||||
Size = CGF.getTypeSize(CVD->getType());
|
||||
Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
|
||||
} else {
|
||||
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
|
||||
Align = CGM.getContext().getDeclAlign(CVD);
|
||||
Size = CGM.getSize(Sz.alignTo(Align));
|
||||
}
|
||||
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
|
||||
llvm::Value *Allocator;
|
||||
if (const Expr *AllocExpr = AA->getAllocator()) {
|
||||
Allocator = CGF.EmitScalarExpr(AllocExpr);
|
||||
} else {
|
||||
// Default allocator in libomp is nullptr.
|
||||
Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
|
||||
}
|
||||
llvm::Value *Args[] = {ThreadID, Size, Allocator};
|
||||
|
||||
llvm::Value *Addr =
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
|
||||
CVD->getName() + ".void.addr");
|
||||
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
|
||||
ThreadID, Addr, Allocator};
|
||||
llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
|
||||
|
||||
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
|
||||
NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
|
||||
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
Addr,
|
||||
CGF.ConvertTypeForMem(
|
||||
CGM.getContext().getPointerType(CVD->getType())),
|
||||
CVD->getName() + ".addr");
|
||||
return Address(Addr, Align);
|
||||
}
|
||||
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
|
||||
// Use the default allocation.
|
||||
if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc)
|
||||
return Address::invalid();
|
||||
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
||||
if (!Elem.second.ServiceInsertPt)
|
||||
setLocThreadIdInsertPt(CGF);
|
||||
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
||||
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
|
||||
llvm::Value *Size;
|
||||
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
|
||||
if (CVD->getType()->isVariablyModifiedType()) {
|
||||
Size = CGF.getTypeSize(CVD->getType());
|
||||
Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
|
||||
} else {
|
||||
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
|
||||
Align = CGM.getContext().getDeclAlign(CVD);
|
||||
Size = CGM.getSize(Sz.alignTo(Align));
|
||||
}
|
||||
return Address::invalid();
|
||||
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
|
||||
assert(AA->getAllocator() &&
|
||||
"Expected allocator expression for non-default allocator.");
|
||||
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
|
||||
llvm::Value *Args[] = {ThreadID, Size, Allocator};
|
||||
|
||||
llvm::Value *Addr =
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
|
||||
CVD->getName() + ".void.addr");
|
||||
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
|
||||
Allocator};
|
||||
llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
|
||||
|
||||
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
|
||||
llvm::makeArrayRef(FiniArgs));
|
||||
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
Addr,
|
||||
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
|
||||
CVD->getName() + ".addr");
|
||||
return Address(Addr, Align);
|
||||
}
|
||||
|
||||
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
|
||||
|
||||
@@ -4725,6 +4725,28 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
|
||||
|
||||
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
const VarDecl *VD) {
|
||||
bool UseDefaultAllocator = true;
|
||||
if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
|
||||
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
|
||||
switch (A->getAllocatorType()) {
|
||||
// Use the default allocator here as by default local vars are
|
||||
// threadlocal.
|
||||
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
|
||||
// Just pass-through to check if the globalization is required.
|
||||
break;
|
||||
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPConstMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
|
||||
UseDefaultAllocator = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
|
||||
return Address::invalid();
|
||||
|
||||
@@ -4746,7 +4768,9 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
return VDI->second.PrivateAddr;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: replace it with return
|
||||
// UseDefaultAllocator ? Address::invalid :
|
||||
// CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
|
||||
// supports __kmpc_alloc|__kmpc_free.
|
||||
return Address::invalid();
|
||||
|
||||
@@ -67,25 +67,15 @@ int main () {
|
||||
static int a;
|
||||
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
|
||||
a=2;
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
|
||||
// CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
|
||||
// CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
|
||||
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
|
||||
// CHECK: store double 3.000000e+00, double* [[B_ADDR]],
|
||||
// CHECK: [[RES:%.+]] = call i32 [[FOO:@.+]]()
|
||||
// CHECK: store i32 [[RES]], i32* [[RET:%.+]],
|
||||
// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
|
||||
// CHECK: alloca double,
|
||||
// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
|
||||
double b = 3;
|
||||
#pragma omp allocate(b)
|
||||
// CHECK: [[RETVAL:%.+]] = load i32, i32* [[RET]],
|
||||
// CHECK: ret i32 [[RETVAL]]
|
||||
return (foo<int>());
|
||||
}
|
||||
|
||||
// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
|
||||
|
||||
// CHECK: define {{.*}}i32 [[FOO]]()
|
||||
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
|
||||
// CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
|
||||
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])
|
||||
|
||||
@@ -57,7 +57,9 @@ namespace ns{
|
||||
}
|
||||
#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
|
||||
|
||||
// CHECK-LABEL: @main
|
||||
int main () {
|
||||
// CHECK: alloca double,
|
||||
static int a;
|
||||
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
|
||||
a=2;
|
||||
@@ -66,6 +68,9 @@ int main () {
|
||||
return (foo<int>());
|
||||
}
|
||||
|
||||
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
|
||||
// CHECK: alloca i32,
|
||||
|
||||
extern template int ST<int>::m;
|
||||
#pragma omp end declare target
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user