[Flang][OpenMP] Add -fopenmp-force-usm option to flang (#94359)

This patch enables the `-fopenmp-force-usm` option to be passed to the
flang driver, which forwards it to the compiler frontend. This flag,
when set, results in the introduction of the `unified_shared_memory` bit
to the `omp.requires` attribute of the top-level module operation.

This is later combined with any other target device-related REQUIRES
clauses that may have been explicitly set in the compilation unit.
This commit is contained in:
Sergio Afonso
2024-06-05 14:43:58 +01:00
committed by GitHub
parent 79e09b1555
commit b9549261e2
10 changed files with 80 additions and 13 deletions

View File

@@ -3592,7 +3592,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
HelpText<"Do not create a host fallback if offloading to the device fails.">,
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,

View File

@@ -765,6 +765,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fopenmp");
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
if (Args.hasArg(options::OPT_fopenmp_force_usm))
CmdArgs.push_back("-fopenmp-force-usm");
// FIXME: Clang supports a whole bunch more flags here.
break;
default:

View File

@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
LANGOPT(OpenMPIsTargetDevice, 1, false)
/// Generate OpenMP target code only for GPUs
LANGOPT(OpenMPIsGPU, 1, false)
/// Generate OpenMP target code only for GPUs
LANGOPT(OpenMPForceUSM, 1, false)
/// Enable debugging in the OpenMP offloading device RTL
LANGOPT(OpenMPTargetDebug, 32, 0)
/// Assume work-shared loops do not have more iterations than participating

View File

@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
bool NoGPULib = false)
bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
std::string OMPHostIRFile = {}, bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
NoGPULib(NoGPULib) {}
OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
NoGPULib(Opts.NoGPULib) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
bool OpenMPNoNestedParallelism = false;
bool OpenMPIsTargetDevice = false;
bool OpenMPIsGPU = false;
bool OpenMPForceUSM = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
@@ -172,6 +174,9 @@ struct OffloadModuleOpts {
module.getOperation())) {
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
if (Opts.OpenMPForceUSM) {
offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
}
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,

View File

@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
res.getLangOpts().OpenMPForceUSM = 1;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;

View File

@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
symbol->details());
}
MlirRequires mlirFlags = MlirRequires::none;
// Use pre-populated omp.requires module attribute if it was set, so that
// the "-fopenmp-force-usm" compiler option is honored.
MlirRequires mlirFlags = offloadMod.getRequires();
if (semaFlags.test(SemaRequires::ReverseOffload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
if (semaFlags.test(SemaRequires::UnifiedAddress))

View File

@@ -207,3 +207,23 @@
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
! Test -fopenmp-force-usm option without offload
! RUN: %flang -S -### %s -o %t 2>&1 \
! RUN: -fopenmp -fopenmp-force-usm \
! RUN: --target=aarch64-unknown-linux-gnu \
! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
! Test -fopenmp-force-usm option with offload
! RUN: %flang -S -### %s -o %t 2>&1 \
! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
! RUN: --target=aarch64-unknown-linux-gnu \
! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"

View File

@@ -0,0 +1,12 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
! This test checks the addition of requires unified_shared_memory when
! -fopenmp-force-usm is set
!CHECK: module attributes {
!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
program requires
end program requires

View File

@@ -0,0 +1,15 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
! This test checks the addition of requires unified_shared_memory when
! -fopenmp-force-usm is set, even when other requires directives are present
!CHECK: module attributes {
!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
program requires
!$omp requires reverse_offload
!$omp target
!$omp end target
end program requires

View File

@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
llvm::cl::desc("enable openmp GPU target codegen"),
llvm::cl::init(false));
static llvm::cl::opt<bool> enableOpenMPForceUSM(
"fopenmp-force-usm",
llvm::cl::desc("force openmp unified shared memory mode"),
llvm::cl::init(false));
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
// positive options are available, no negative options e.g. fopen_assume* vs
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
"-fopenmp-is-target-device is also set";
return mlir::failure();
}
auto offloadModuleOpts =
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
setOpenMPThreadSubscription, setOpenMPNoThreadState,
setOpenMPNoNestedParallelism, enableOpenMPDevice,
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
auto offloadModuleOpts = OffloadModuleOpts(
setOpenMPTargetDebug, setOpenMPTeamSubscription,
setOpenMPThreadSubscription, setOpenMPNoThreadState,
setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}