[OPENMP][NVPTX]Fixed processing of memory management directives.

Added special processing of the memory management directives/clauses for
NVPTX target. For private locals, omp_default_mem_alloc and
omp_thread_mem_alloc result in allocation in local memory.
omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc
allocates shared memory, and omp_cgroup_mem_alloc and
omp_large_cap_mem_alloc allocate global memory.

llvm-svn: 357923
This commit is contained in:
Alexey Bataev
2019-04-08 16:53:57 +00:00
parent 10d6008f85
commit 1db9bfeba5
2 changed files with 74 additions and 15 deletions

View File

@@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
return;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
// Use user-specified allocation.
if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
return;
// Variables captured by value must be globalized.
if (auto *CSI = CGF.CapturedStmtInfo) {
if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
@@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
bool UseDefaultAllocator = true;
if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch (A->getAllocatorType()) {
@@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
// threadlocal.
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
// Just pass-through to check if the globalization is required.
break;
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
case OMPAllocateDeclAttr::OMPConstMemAlloc:
case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
// Follow the user decision - use default allocation.
return Address::invalid();
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
UseDefaultAllocator = false;
break;
// TODO: implement aupport for user-defined allocators.
return Address::invalid();
case OMPAllocateDeclAttr::OMPConstMemAlloc: {
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), VarTy, /*isConstant=*/false,
llvm::GlobalValue::InternalLinkage,
llvm::Constant::getNullValue(VarTy), VD->getName(),
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
CharUnits Align = CGM.getContext().getDeclAlign(VD);
GV->setAlignment(Align.getQuantity());
return Address(GV, Align);
}
case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), VarTy, /*isConstant=*/false,
llvm::GlobalValue::InternalLinkage,
llvm::Constant::getNullValue(VarTy), VD->getName(),
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
CharUnits Align = CGM.getContext().getDeclAlign(VD);
GV->setAlignment(Align.getQuantity());
return Address(GV, Align);
}
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), VarTy, /*isConstant=*/false,
llvm::GlobalValue::InternalLinkage,
llvm::Constant::getNullValue(VarTy), VD->getName());
CharUnits Align = CGM.getContext().getDeclAlign(VD);
GV->setAlignment(Align.getQuantity());
return Address(GV, Align);
}
}
}
@@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
}
}
// TODO: replace it with return
// UseDefaultAllocator ? Address::invalid :
// CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
// supports __kmpc_alloc|__kmpc_free.
(void)UseDefaultAllocator; // Prevent a warning.
return Address::invalid();
}

View File

@@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc;
// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
// CHECK-DAG: @bar_c = internal global i32 0,
// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,
struct St{
int a;
};
@@ -64,13 +66,42 @@ int main () {
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
double b = 3;
float c;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
return (foo<int>());
}
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: alloca i32,
// CHECK-NOT: alloca i32,
extern template int ST<int>::m;
void baz(float &);
// CHECK: define void @{{.+}}bar{{.+}}()
void bar() {
// CHECK: alloca float,
float bar_a;
// CHECK: alloca double,
double bar_b;
int bar_c;
#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
// CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})
#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \
: bar_a) allocate(omp_pteam_mem_alloc \
: bar_b)
{
bar_b = bar_a;
baz(bar_a);
}
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
// CHECK-NOT: alloca double,
// CHECK: alloca float,
// CHECK-NOT: alloca double,
// CHECK: load float, float* %
// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
}
#pragma omp end declare target
#endif