mirror of
https://github.com/intel/llvm.git
synced 2026-01-17 22:54:50 +08:00
[Flang][OpenMP] Add -fopenmp-force-usm option to flang (#94359)
This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation. This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.
This commit is contained in:
@@ -3592,7 +3592,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
|
||||
HelpText<"Do not create a host fallback if offloading to the device fails.">,
|
||||
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
|
||||
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
|
||||
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
|
||||
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
|
||||
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
|
||||
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
|
||||
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
|
||||
|
||||
@@ -765,6 +765,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("-fopenmp");
|
||||
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
|
||||
|
||||
if (Args.hasArg(options::OPT_fopenmp_force_usm))
|
||||
CmdArgs.push_back("-fopenmp-force-usm");
|
||||
|
||||
// FIXME: Clang supports a whole bunch more flags here.
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
|
||||
LANGOPT(OpenMPIsTargetDevice, 1, false)
|
||||
/// Generate OpenMP target code only for GPUs
|
||||
LANGOPT(OpenMPIsGPU, 1, false)
|
||||
/// Generate OpenMP target code only for GPUs
|
||||
LANGOPT(OpenMPForceUSM, 1, false)
|
||||
/// Enable debugging in the OpenMP offloading device RTL
|
||||
LANGOPT(OpenMPTargetDebug, 32, 0)
|
||||
/// Assume work-shared loops do not have more iterations than participating
|
||||
|
||||
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
|
||||
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
|
||||
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
|
||||
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
|
||||
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
|
||||
bool NoGPULib = false)
|
||||
bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
|
||||
std::string OMPHostIRFile = {}, bool NoGPULib = false)
|
||||
: OpenMPTargetDebug(OpenMPTargetDebug),
|
||||
OpenMPTeamSubscription(OpenMPTeamSubscription),
|
||||
OpenMPThreadSubscription(OpenMPThreadSubscription),
|
||||
OpenMPNoThreadState(OpenMPNoThreadState),
|
||||
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
|
||||
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
|
||||
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
|
||||
NoGPULib(NoGPULib) {}
|
||||
OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
|
||||
OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
|
||||
|
||||
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
|
||||
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
|
||||
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
|
||||
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
|
||||
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
|
||||
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
|
||||
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
|
||||
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
|
||||
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
|
||||
OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
|
||||
NoGPULib(Opts.NoGPULib) {}
|
||||
|
||||
uint32_t OpenMPTargetDebug = 0;
|
||||
bool OpenMPTeamSubscription = false;
|
||||
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
|
||||
bool OpenMPNoNestedParallelism = false;
|
||||
bool OpenMPIsTargetDevice = false;
|
||||
bool OpenMPIsGPU = false;
|
||||
bool OpenMPForceUSM = false;
|
||||
uint32_t OpenMPVersion = 11;
|
||||
std::string OMPHostIRFile = {};
|
||||
bool NoGPULib = false;
|
||||
@@ -172,6 +174,9 @@ struct OffloadModuleOpts {
|
||||
module.getOperation())) {
|
||||
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
|
||||
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
|
||||
if (Opts.OpenMPForceUSM) {
|
||||
offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
|
||||
}
|
||||
if (Opts.OpenMPIsTargetDevice) {
|
||||
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
|
||||
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
|
||||
|
||||
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
|
||||
res.getLangOpts().OpenMPVersion, diags)) {
|
||||
res.getLangOpts().OpenMPVersion = Version;
|
||||
}
|
||||
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
|
||||
res.getLangOpts().OpenMPForceUSM = 1;
|
||||
}
|
||||
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
|
||||
res.getLangOpts().OpenMPIsTargetDevice = 1;
|
||||
|
||||
|
||||
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
|
||||
symbol->details());
|
||||
}
|
||||
|
||||
MlirRequires mlirFlags = MlirRequires::none;
|
||||
// Use pre-populated omp.requires module attribute if it was set, so that
|
||||
// the "-fopenmp-force-usm" compiler option is honored.
|
||||
MlirRequires mlirFlags = offloadMod.getRequires();
|
||||
if (semaFlags.test(SemaRequires::ReverseOffload))
|
||||
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
|
||||
if (semaFlags.test(SemaRequires::UnifiedAddress))
|
||||
|
||||
@@ -207,3 +207,23 @@
|
||||
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
|
||||
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
|
||||
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
|
||||
|
||||
! Test -fopenmp-force-usm option without offload
|
||||
! RUN: %flang -S -### %s -o %t 2>&1 \
|
||||
! RUN: -fopenmp -fopenmp-force-usm \
|
||||
! RUN: --target=aarch64-unknown-linux-gnu \
|
||||
! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
|
||||
|
||||
! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
|
||||
! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
|
||||
|
||||
! Test -fopenmp-force-usm option with offload
|
||||
! RUN: %flang -S -### %s -o %t 2>&1 \
|
||||
! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
|
||||
! RUN: --target=aarch64-unknown-linux-gnu \
|
||||
! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
|
||||
|
||||
! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
|
||||
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
|
||||
! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
|
||||
|
||||
12
flang/test/Lower/OpenMP/force-usm.f90
Normal file
12
flang/test/Lower/OpenMP/force-usm.f90
Normal file
@@ -0,0 +1,12 @@
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
|
||||
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
|
||||
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
|
||||
|
||||
! This test checks the addition of requires unified_shared_memory when
|
||||
! -fopenmp-force-usm is set
|
||||
|
||||
!CHECK: module attributes {
|
||||
!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
|
||||
program requires
|
||||
end program requires
|
||||
15
flang/test/Lower/OpenMP/requires-force-usm.f90
Normal file
15
flang/test/Lower/OpenMP/requires-force-usm.f90
Normal file
@@ -0,0 +1,15 @@
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
|
||||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
|
||||
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
|
||||
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
|
||||
|
||||
! This test checks the addition of requires unified_shared_memory when
|
||||
! -fopenmp-force-usm is set, even when other requires directives are present
|
||||
|
||||
!CHECK: module attributes {
|
||||
!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
|
||||
program requires
|
||||
!$omp requires reverse_offload
|
||||
!$omp target
|
||||
!$omp end target
|
||||
end program requires
|
||||
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
|
||||
llvm::cl::desc("enable openmp GPU target codegen"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
static llvm::cl::opt<bool> enableOpenMPForceUSM(
|
||||
"fopenmp-force-usm",
|
||||
llvm::cl::desc("force openmp unified shared memory mode"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
|
||||
// positive options are available, no negative options e.g. fopen_assume* vs
|
||||
// fno_open_assume*
|
||||
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
|
||||
"-fopenmp-is-target-device is also set";
|
||||
return mlir::failure();
|
||||
}
|
||||
auto offloadModuleOpts =
|
||||
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
|
||||
setOpenMPThreadSubscription, setOpenMPNoThreadState,
|
||||
setOpenMPNoNestedParallelism, enableOpenMPDevice,
|
||||
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
|
||||
auto offloadModuleOpts = OffloadModuleOpts(
|
||||
setOpenMPTargetDebug, setOpenMPTeamSubscription,
|
||||
setOpenMPThreadSubscription, setOpenMPNoThreadState,
|
||||
setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
|
||||
enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
|
||||
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
|
||||
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user