[OpenMP]Emit captured decls for target data if no devices were specified.

If use_device_ptr/use_device_addr clauses are used on target data
directive and no device was specified during the compilation, only host
part should be emitted. But it still required to emit captured decls for
partially mapped data fields.

Differential Revision: https://reviews.llvm.org/D144993
This commit is contained in:
Alexey Bataev
2023-02-28 11:17:21 -08:00
parent 37216b4b3a
commit acc30a169e
2 changed files with 83 additions and 6 deletions

View File

@@ -7260,16 +7260,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
};
DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
CodeGenFunction &CGF, PrePostActionTy &Action) {
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
// Codegen that selects whether to generate the privatization code or not.
auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
&InnermostCodeGen](CodeGenFunction &CGF,
PrePostActionTy &Action) {
auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
RegionCodeGenTy RCG(InnermostCodeGen);
PrivatizeDevicePointers = false;
@@ -7289,7 +7286,28 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
(void)PrivateScope.Privatize();
RCG(CGF);
} else {
OMPLexicalScope Scope(CGF, S, OMPD_unknown);
// If we don't have target devices, don't bother emitting the data
// mapping code.
std::optional<OpenMPDirectiveKind> CaptureRegion;
if (CGM.getLangOpts().OMPTargetTriples.empty()) {
// Emit helper decls of the use_device_ptr/use_device_addr clauses.
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
for (const Expr *E : C->varlists()) {
const Decl *D = cast<DeclRefExpr>(E)->getDecl();
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
for (const Expr *E : C->varlists()) {
const Decl *D = getBaseDecl(E);
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
} else {
CaptureRegion = OMPD_unknown;
}
OMPLexicalScope Scope(CGF, S, CaptureRegion);
RCG(CGF);
}
};

View File

@@ -0,0 +1,59 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
template <int T> class A {
double *ptr = nullptr;
public:
void foo() {
#pragma omp target data use_device_ptr(ptr)
{ double *capture = ptr; }
}
};
template class A<0>;
#endif // HEADER
// CHECK-LABEL: define {{[^@]+}}@_ZN1AILi0EE3fooEv
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[PTR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURE:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
// CHECK-NEXT: store ptr [[TMP1]], ptr [[CAPTURE]], align 8
// CHECK-NEXT: ret void
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1AILi0EE3fooEv
// SIMD-ONLY0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0:[0-9]+]] align 2 {
// SIMD-ONLY0-NEXT: entry:
// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[PTR:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: [[CAPTURE:%.*]] = alloca ptr, align 8
// SIMD-ONLY0-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
// SIMD-ONLY0-NEXT: store ptr [[PTR2]], ptr [[PTR]], align 8
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
// SIMD-ONLY0-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
// SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[CAPTURE]], align 8
// SIMD-ONLY0-NEXT: ret void
//