mirror of
https://github.com/intel/llvm.git
synced 2026-01-17 23:45:25 +08:00
[flang][cuda] Extends matching distance computation (#91810)
Extends the computation of the matching distance in the generic resolution to support options described in the table: https://docs.nvidia.com/hpc-sdk/archive/24.3/compilers/cuda-fortran-prog-guide/index.html#cfref-var-attr-unified-data Options are added as language features in the `SemanticsContext` and a flag is added in bbc for testing purpose.
This commit is contained in:
committed by
GitHub
parent
be7c9e3957
commit
e8eb52d167
@@ -49,7 +49,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
|
||||
IndistinguishableSpecifics, SubroutineAndFunctionSpecifics,
|
||||
EmptySequenceType, NonSequenceCrayPointee, BranchIntoConstruct,
|
||||
BadBranchTarget, ConvertedArgument, HollerithPolymorphic, ListDirectedSize,
|
||||
NonBindCInteroperability)
|
||||
NonBindCInteroperability, CudaManaged, CudaUnified)
|
||||
|
||||
// Portability and suspicious usage warnings
|
||||
ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable,
|
||||
@@ -81,6 +81,8 @@ public:
|
||||
disable_.set(LanguageFeature::OpenACC);
|
||||
disable_.set(LanguageFeature::OpenMP);
|
||||
disable_.set(LanguageFeature::CUDA); // !@cuf
|
||||
disable_.set(LanguageFeature::CudaManaged);
|
||||
disable_.set(LanguageFeature::CudaUnified);
|
||||
disable_.set(LanguageFeature::ImplicitNoneTypeNever);
|
||||
disable_.set(LanguageFeature::ImplicitNoneTypeAlways);
|
||||
disable_.set(LanguageFeature::DefaultSave);
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <string>
|
||||
|
||||
namespace Fortran::common {
|
||||
class LanguageFeatureControl;
|
||||
|
||||
// Fortran has five kinds of intrinsic data types, plus the derived types.
|
||||
ENUM_CLASS(TypeCategory, Integer, Real, Complex, Character, Logical, Derived)
|
||||
@@ -115,7 +116,8 @@ static constexpr IgnoreTKRSet ignoreTKRAll{IgnoreTKR::Type, IgnoreTKR::Kind,
|
||||
std::string AsFortran(IgnoreTKRSet);
|
||||
|
||||
bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr>,
|
||||
std::optional<CUDADataAttr>, IgnoreTKRSet, bool allowUnifiedMatchingRule);
|
||||
std::optional<CUDADataAttr>, IgnoreTKRSet, bool allowUnifiedMatchingRule,
|
||||
const LanguageFeatureControl *features = nullptr);
|
||||
|
||||
static constexpr char blankCommonObjectName[] = "__BLNK__";
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Common/Fortran.h"
|
||||
#include "flang/Common/Fortran-features.h"
|
||||
|
||||
namespace Fortran::common {
|
||||
|
||||
@@ -102,7 +103,13 @@ std::string AsFortran(IgnoreTKRSet tkr) {
|
||||
/// dummy argument attribute while `y` represents the actual argument attribute.
|
||||
bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
|
||||
std::optional<CUDADataAttr> y, IgnoreTKRSet ignoreTKR,
|
||||
bool allowUnifiedMatchingRule) {
|
||||
bool allowUnifiedMatchingRule, const LanguageFeatureControl *features) {
|
||||
bool isCudaManaged{features
|
||||
? features->IsEnabled(common::LanguageFeature::CudaManaged)
|
||||
: false};
|
||||
bool isCudaUnified{features
|
||||
? features->IsEnabled(common::LanguageFeature::CudaUnified)
|
||||
: false};
|
||||
if (!x && !y) {
|
||||
return true;
|
||||
} else if (x && y && *x == *y) {
|
||||
@@ -120,19 +127,27 @@ bool AreCompatibleCUDADataAttrs(std::optional<CUDADataAttr> x,
|
||||
return true;
|
||||
} else if (allowUnifiedMatchingRule) {
|
||||
if (!x) { // Dummy argument has no attribute -> host
|
||||
if (y && (*y == CUDADataAttr::Managed || *y == CUDADataAttr::Unified)) {
|
||||
if ((y && (*y == CUDADataAttr::Managed || *y == CUDADataAttr::Unified)) ||
|
||||
(!y && (isCudaUnified || isCudaManaged))) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (*x == CUDADataAttr::Device && y &&
|
||||
(*y == CUDADataAttr::Managed || *y == CUDADataAttr::Unified)) {
|
||||
return true;
|
||||
} else if (*x == CUDADataAttr::Managed && y &&
|
||||
*y == CUDADataAttr::Unified) {
|
||||
return true;
|
||||
} else if (*x == CUDADataAttr::Unified && y &&
|
||||
*y == CUDADataAttr::Managed) {
|
||||
return true;
|
||||
if (*x == CUDADataAttr::Device) {
|
||||
if ((y &&
|
||||
(*y == CUDADataAttr::Managed || *y == CUDADataAttr::Unified)) ||
|
||||
(!y && (isCudaUnified || isCudaManaged))) {
|
||||
return true;
|
||||
}
|
||||
} else if (*x == CUDADataAttr::Managed) {
|
||||
if ((y && *y == CUDADataAttr::Unified) ||
|
||||
(!y && (isCudaUnified || isCudaManaged))) {
|
||||
return true;
|
||||
}
|
||||
} else if (*x == CUDADataAttr::Unified) {
|
||||
if ((y && *y == CUDADataAttr::Managed) ||
|
||||
(!y && (isCudaUnified || isCudaManaged))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -914,7 +914,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
|
||||
}
|
||||
if (!common::AreCompatibleCUDADataAttrs(dummyDataAttr, actualDataAttr,
|
||||
dummy.ignoreTKR,
|
||||
/*allowUnifiedMatchingRule=*/true)) {
|
||||
/*allowUnifiedMatchingRule=*/true, &context.languageFeatures())) {
|
||||
auto toStr{[](std::optional<common::CUDADataAttr> x) {
|
||||
return x ? "ATTRIBUTES("s +
|
||||
parser::ToUpperCaseLetters(common::EnumToString(*x)) + ")"s
|
||||
|
||||
@@ -2501,8 +2501,13 @@ static constexpr int cudaInfMatchingValue{std::numeric_limits<int>::max()};
|
||||
|
||||
// Compute the matching distance as described in section 3.2.3 of the CUDA
|
||||
// Fortran references.
|
||||
static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
static int GetMatchingDistance(const common::LanguageFeatureControl &features,
|
||||
const characteristics::DummyArgument &dummy,
|
||||
const std::optional<ActualArgument> &actual) {
|
||||
bool isCudaManaged{features.IsEnabled(common::LanguageFeature::CudaManaged)};
|
||||
bool isCudaUnified{features.IsEnabled(common::LanguageFeature::CudaUnified)};
|
||||
CHECK(!(isCudaUnified && isCudaManaged) && "expect only one enabled.");
|
||||
|
||||
std::optional<common::CUDADataAttr> actualDataAttr, dummyDataAttr;
|
||||
if (actual) {
|
||||
if (auto *expr{actual->UnwrapExpr()}) {
|
||||
@@ -2529,6 +2534,9 @@ static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
|
||||
if (!dummyDataAttr) {
|
||||
if (!actualDataAttr) {
|
||||
if (isCudaUnified || isCudaManaged) {
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
} else if (*actualDataAttr == common::CUDADataAttr::Device) {
|
||||
return cudaInfMatchingValue;
|
||||
@@ -2538,6 +2546,9 @@ static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
}
|
||||
} else if (*dummyDataAttr == common::CUDADataAttr::Device) {
|
||||
if (!actualDataAttr) {
|
||||
if (isCudaUnified || isCudaManaged) {
|
||||
return 2;
|
||||
}
|
||||
return cudaInfMatchingValue;
|
||||
} else if (*actualDataAttr == common::CUDADataAttr::Device) {
|
||||
return 0;
|
||||
@@ -2546,7 +2557,10 @@ static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
return 2;
|
||||
}
|
||||
} else if (*dummyDataAttr == common::CUDADataAttr::Managed) {
|
||||
if (!actualDataAttr || *actualDataAttr == common::CUDADataAttr::Device) {
|
||||
if (!actualDataAttr) {
|
||||
return isCudaUnified ? 1 : isCudaManaged ? 0 : cudaInfMatchingValue;
|
||||
}
|
||||
if (*actualDataAttr == common::CUDADataAttr::Device) {
|
||||
return cudaInfMatchingValue;
|
||||
} else if (*actualDataAttr == common::CUDADataAttr::Managed) {
|
||||
return 0;
|
||||
@@ -2554,7 +2568,10 @@ static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
return 1;
|
||||
}
|
||||
} else if (*dummyDataAttr == common::CUDADataAttr::Unified) {
|
||||
if (!actualDataAttr || *actualDataAttr == common::CUDADataAttr::Device) {
|
||||
if (!actualDataAttr) {
|
||||
return isCudaUnified ? 0 : isCudaManaged ? 1 : cudaInfMatchingValue;
|
||||
}
|
||||
if (*actualDataAttr == common::CUDADataAttr::Device) {
|
||||
return cudaInfMatchingValue;
|
||||
} else if (*actualDataAttr == common::CUDADataAttr::Managed) {
|
||||
return 1;
|
||||
@@ -2566,6 +2583,7 @@ static int GetMatchingDistance(const characteristics::DummyArgument &dummy,
|
||||
}
|
||||
|
||||
static int ComputeCudaMatchingDistance(
|
||||
const common::LanguageFeatureControl &features,
|
||||
const characteristics::Procedure &procedure,
|
||||
const ActualArguments &actuals) {
|
||||
const auto &dummies{procedure.dummyArguments};
|
||||
@@ -2574,7 +2592,7 @@ static int ComputeCudaMatchingDistance(
|
||||
for (std::size_t i{0}; i < dummies.size(); ++i) {
|
||||
const characteristics::DummyArgument &dummy{dummies[i]};
|
||||
const std::optional<ActualArgument> &actual{actuals[i]};
|
||||
int d{GetMatchingDistance(dummy, actual)};
|
||||
int d{GetMatchingDistance(features, dummy, actual)};
|
||||
if (d == cudaInfMatchingValue)
|
||||
return d;
|
||||
distance += d;
|
||||
@@ -2666,7 +2684,9 @@ std::pair<const Symbol *, bool> ExpressionAnalyzer::ResolveGeneric(
|
||||
CheckCompatibleArguments(*procedure, localActuals)) {
|
||||
if ((procedure->IsElemental() && elemental) ||
|
||||
(!procedure->IsElemental() && nonElemental)) {
|
||||
int d{ComputeCudaMatchingDistance(*procedure, localActuals)};
|
||||
int d{ComputeCudaMatchingDistance(
|
||||
context_.languageFeatures(), *procedure, localActuals)};
|
||||
llvm::errs() << "matching distance: " << d << "\n";
|
||||
if (d != crtMatchingDistance) {
|
||||
if (d > crtMatchingDistance) {
|
||||
continue;
|
||||
@@ -2688,8 +2708,8 @@ std::pair<const Symbol *, bool> ExpressionAnalyzer::ResolveGeneric(
|
||||
} else {
|
||||
elemental = &specific;
|
||||
}
|
||||
crtMatchingDistance =
|
||||
ComputeCudaMatchingDistance(*procedure, localActuals);
|
||||
crtMatchingDistance = ComputeCudaMatchingDistance(
|
||||
context_.languageFeatures(), *procedure, localActuals);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
55
flang/test/Semantics/cuf14.cuf
Normal file
55
flang/test/Semantics/cuf14.cuf
Normal file
@@ -0,0 +1,55 @@
|
||||
! RUN: bbc -emit-hlfir -fcuda -gpu=unified %s -o - | FileCheck %s
|
||||
|
||||
module matching
|
||||
interface host_and_device
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
end interface
|
||||
|
||||
interface all
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
module procedure sub_managed
|
||||
module procedure sub_unified
|
||||
end interface
|
||||
|
||||
interface all_without_unified
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
module procedure sub_managed
|
||||
end interface
|
||||
|
||||
contains
|
||||
subroutine sub_host(a)
|
||||
integer :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_device(a)
|
||||
integer, device :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_managed(a)
|
||||
integer, managed :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_unified(a)
|
||||
integer, unified :: a(:)
|
||||
end
|
||||
end module
|
||||
|
||||
program m
|
||||
use matching
|
||||
|
||||
integer, allocatable :: actual_host(:)
|
||||
|
||||
allocate(actual_host(10))
|
||||
|
||||
call host_and_device(actual_host) ! Should resolve to sub_device
|
||||
call all(actual_host) ! Should resolved to unified
|
||||
call all_without_unified(actual_host) ! Should resolved to managed
|
||||
end
|
||||
|
||||
! CHECK: fir.call @_QMmatchingPsub_device
|
||||
! CHECK: fir.call @_QMmatchingPsub_unified
|
||||
! CHECK: fir.call @_QMmatchingPsub_managed
|
||||
|
||||
55
flang/test/Semantics/cuf15.cuf
Normal file
55
flang/test/Semantics/cuf15.cuf
Normal file
@@ -0,0 +1,55 @@
|
||||
! RUN: bbc -emit-hlfir -fcuda -gpu=managed %s -o - | FileCheck %s
|
||||
|
||||
module matching
|
||||
interface host_and_device
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
end interface
|
||||
|
||||
interface all
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
module procedure sub_managed
|
||||
module procedure sub_unified
|
||||
end interface
|
||||
|
||||
interface all_without_managed
|
||||
module procedure sub_host
|
||||
module procedure sub_device
|
||||
module procedure sub_unified
|
||||
end interface
|
||||
|
||||
contains
|
||||
subroutine sub_host(a)
|
||||
integer :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_device(a)
|
||||
integer, device :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_managed(a)
|
||||
integer, managed :: a(:)
|
||||
end
|
||||
|
||||
subroutine sub_unified(a)
|
||||
integer, unified :: a(:)
|
||||
end
|
||||
end module
|
||||
|
||||
program m
|
||||
use matching
|
||||
|
||||
integer, allocatable :: actual_host(:)
|
||||
|
||||
allocate(actual_host(10))
|
||||
|
||||
call host_and_device(actual_host) ! Should resolve to sub_device
|
||||
call all(actual_host) ! Should resolved to unified
|
||||
call all_without_managed(actual_host) ! Should resolved to managed
|
||||
end
|
||||
|
||||
! CHECK: fir.call @_QMmatchingPsub_device
|
||||
! CHECK: fir.call @_QMmatchingPsub_managed
|
||||
! CHECK: fir.call @_QMmatchingPsub_unified
|
||||
|
||||
@@ -204,6 +204,10 @@ static llvm::cl::opt<bool> enableCUDA("fcuda",
|
||||
llvm::cl::desc("enable CUDA Fortran"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
static llvm::cl::opt<std::string>
|
||||
enableGPUMode("gpu", llvm::cl::desc("Enable GPU Mode managed|unified"),
|
||||
llvm::cl::init(""));
|
||||
|
||||
static llvm::cl::opt<bool> fixedForm("ffixed-form",
|
||||
llvm::cl::desc("enable fixed form"),
|
||||
llvm::cl::init(false));
|
||||
@@ -495,6 +499,12 @@ int main(int argc, char **argv) {
|
||||
options.features.Enable(Fortran::common::LanguageFeature::CUDA);
|
||||
}
|
||||
|
||||
if (enableGPUMode == "managed") {
|
||||
options.features.Enable(Fortran::common::LanguageFeature::CudaManaged);
|
||||
} else if (enableGPUMode == "unified") {
|
||||
options.features.Enable(Fortran::common::LanguageFeature::CudaUnified);
|
||||
}
|
||||
|
||||
if (fixedForm) {
|
||||
options.isFixedForm = fixedForm;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user