mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[OPENMP][NVPTX]Fixed processing of memory management directives.
Added special processing of the memory management directives/clauses for NVPTX target. For private locals, omp_default_mem_alloc and omp_thread_mem_alloc result in allocation in local memory. omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc allocates shared memory, and omp_cgroup_mem_alloc and omp_large_cap_mem_alloc allocate global memory. llvm-svn: 357923
This commit is contained in:
@@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final
|
||||
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
|
||||
return;
|
||||
VD = cast<ValueDecl>(VD->getCanonicalDecl());
|
||||
// Use user-specified allocation.
|
||||
if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
|
||||
return;
|
||||
// Variables captured by value must be globalized.
|
||||
if (auto *CSI = CGF.CapturedStmtInfo) {
|
||||
if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
|
||||
@@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
|
||||
|
||||
Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
const VarDecl *VD) {
|
||||
bool UseDefaultAllocator = true;
|
||||
if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
|
||||
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
|
||||
switch (A->getAllocatorType()) {
|
||||
@@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
// threadlocal.
|
||||
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
|
||||
// Just pass-through to check if the globalization is required.
|
||||
break;
|
||||
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPConstMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
|
||||
// Follow the user decision - use default allocation.
|
||||
return Address::invalid();
|
||||
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
|
||||
UseDefaultAllocator = false;
|
||||
break;
|
||||
// TODO: implement aupport for user-defined allocators.
|
||||
return Address::invalid();
|
||||
case OMPAllocateDeclAttr::OMPConstMemAlloc: {
|
||||
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), VarTy, /*isConstant=*/false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
llvm::Constant::getNullValue(VarTy), VD->getName(),
|
||||
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
|
||||
CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
|
||||
CharUnits Align = CGM.getContext().getDeclAlign(VD);
|
||||
GV->setAlignment(Align.getQuantity());
|
||||
return Address(GV, Align);
|
||||
}
|
||||
case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
|
||||
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), VarTy, /*isConstant=*/false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
llvm::Constant::getNullValue(VarTy), VD->getName(),
|
||||
/*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
|
||||
CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
|
||||
CharUnits Align = CGM.getContext().getDeclAlign(VD);
|
||||
GV->setAlignment(Align.getQuantity());
|
||||
return Address(GV, Align);
|
||||
}
|
||||
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
|
||||
case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
|
||||
llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), VarTy, /*isConstant=*/false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
llvm::Constant::getNullValue(VarTy), VD->getName());
|
||||
CharUnits Align = CGM.getContext().getDeclAlign(VD);
|
||||
GV->setAlignment(Align.getQuantity());
|
||||
return Address(GV, Align);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: replace it with return
|
||||
// UseDefaultAllocator ? Address::invalid :
|
||||
// CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
|
||||
// supports __kmpc_alloc|__kmpc_free.
|
||||
(void)UseDefaultAllocator; // Prevent a warning.
|
||||
return Address::invalid();
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc;
|
||||
// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
|
||||
// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
|
||||
// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
|
||||
// CHECK-DAG: @bar_c = internal global i32 0,
|
||||
// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,
|
||||
struct St{
|
||||
int a;
|
||||
};
|
||||
@@ -64,13 +66,42 @@ int main () {
|
||||
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
|
||||
a=2;
|
||||
double b = 3;
|
||||
float c;
|
||||
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
|
||||
#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
|
||||
return (foo<int>());
|
||||
}
|
||||
|
||||
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
|
||||
// CHECK: alloca i32,
|
||||
// CHECK-NOT: alloca i32,
|
||||
|
||||
extern template int ST<int>::m;
|
||||
|
||||
void baz(float &);
|
||||
|
||||
// CHECK: define void @{{.+}}bar{{.+}}()
|
||||
void bar() {
|
||||
// CHECK: alloca float,
|
||||
float bar_a;
|
||||
// CHECK: alloca double,
|
||||
double bar_b;
|
||||
int bar_c;
|
||||
#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
|
||||
// CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}})
|
||||
#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \
|
||||
: bar_a) allocate(omp_pteam_mem_alloc \
|
||||
: bar_b)
|
||||
{
|
||||
bar_b = bar_a;
|
||||
baz(bar_a);
|
||||
}
|
||||
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
|
||||
// CHECK-NOT: alloca double,
|
||||
// CHECK: alloca float,
|
||||
// CHECK-NOT: alloca double,
|
||||
// CHECK: load float, float* %
|
||||
// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
|
||||
}
|
||||
|
||||
#pragma omp end declare target
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user