mirror of
https://github.com/intel/llvm.git
synced 2026-01-23 07:58:23 +08:00
Clang currently does not allow using CTAD in CUDA/HIP device functions
since deduction guides are treated as host-only. This patch fixes that
by treating deduction guides as host+device. The rationale is that
deduction guides do not actually generate code in IR, and there is an
existing check for device/host correctness for constructors.
The patch also suppresses duplicate implicit deduction guides from
host/device constructors with identical signatures and constraints to
prevent ambiguity.
For CUDA/HIP, deduction guides are now always implicitly enabled for
both host and device, which matches nvcc's effective behavior. Unlike
nvcc, which silently ignores explicit CUDA/HIP target attributes on
deduction guides, Clang diagnoses device- and host-only attributes as
errors to keep the syntax clean and avoid confusion.
This ensures CTAD works correctly in CUDA/HIP for constructors with
different target attributes and provides clearer diagnostics when users
attempt to annotate deduction guides with CUDA/HIP target attributes.
Example:
```
#include <tuple>
__host__ __device__ void func()
{
std::tuple<int, int> t = std::tuple(1, 1);
}
```
This compiles with nvcc but fails with clang for CUDA/HIP without this
fix.
Reference: https://godbolt.org/z/WhT1GrhWE
Fixes: https://github.com/ROCm/ROCm/issues/5646
Fixes: https://github.com/llvm/llvm-project/issues/146646
This commit is contained in:
@@ -412,6 +412,57 @@ Example Usage
|
||||
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
|
||||
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
|
||||
|
||||
C++17 Class Template Argument Deduction (CTAD) Support
|
||||
======================================================
|
||||
|
||||
Clang supports C++17 Class Template Argument Deduction (CTAD) in both host and
|
||||
device code for HIP. This allows you to omit template arguments when creating
|
||||
class template instances, letting the compiler deduce them from constructor
|
||||
arguments.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
#include <tuple>
|
||||
|
||||
__host__ __device__ void func() {
|
||||
std::tuple<int, int> t = std::tuple(1, 1);
|
||||
}
|
||||
|
||||
In the above example, ``std::tuple(1, 1)`` automatically deduces the type to be
|
||||
``std::tuple<int, int>``.
|
||||
|
||||
Deduction Guides
|
||||
----------------
|
||||
|
||||
User-defined deduction guides are also supported. Since deduction guides are not
|
||||
executable code and only participate in type deduction, they semantically behave
|
||||
as ``__host__ __device__``. This ensures they are available for deduction in both
|
||||
host and device contexts, and CTAD continues to respect any constraints on the
|
||||
corresponding constructors in the usual C++ way.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
template <typename T>
|
||||
struct MyType {
|
||||
T value;
|
||||
__device__ MyType(T v) : value(v) {}
|
||||
};
|
||||
|
||||
MyType(float) -> MyType<double>;
|
||||
|
||||
__device__ void deviceFunc() {
|
||||
MyType m(1.0f); // Deduces MyType<double>
|
||||
}
|
||||
|
||||
.. note::
|
||||
|
||||
Explicit HIP target attributes such as ``__host__`` or ``__device__``
|
||||
are currently only permitted on deduction guides when both are present
|
||||
(``__host__ __device__``). This usage is deprecated and will be rejected
|
||||
in a future version of Clang; prefer omitting HIP target attributes on
|
||||
deduction guides entirely. Clang treats all deduction guides as if they
|
||||
were ``__host__ __device__``, so ``__host__``-only, ``__device__``-only,
|
||||
or ``__global__`` deduction guides are rejected as ill-formed.
|
||||
|
||||
Host and Device Attributes of Default Destructors
|
||||
===================================================
|
||||
|
||||
@@ -663,6 +663,23 @@ RISC-V Support
|
||||
CUDA/HIP Language Changes
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
- Clang now supports C++17 Class Template Argument Deduction (CTAD) in CUDA/HIP
|
||||
device code by treating deduction guides as if they were ``__host__ __device__``.
|
||||
|
||||
- Clang avoids ambiguous CTAD in CUDA/HIP by not synthesizing duplicate implicit
|
||||
deduction guides when ``__host__`` and ``__device__`` constructors differ only
|
||||
in CUDA target attributes (same signature and constraints).
|
||||
|
||||
- Clang diagnoses CUDA/HIP deduction guides that are annotated as host-only,
|
||||
device-only, or ``__global__`` as errors. Explicit ``__host__ __device__``
|
||||
deduction guides remain accepted for now but are deprecated and will be
|
||||
rejected in a future version of Clang; deduction guides do not participate
|
||||
in code generation and are treated as implicitly host+device.
|
||||
|
||||
- Clang preserves distinct implicit deduction guides for constructors that differ
|
||||
by constraints, so constraint-based CTAD works in CUDA/HIP device code as in
|
||||
standard C++.
|
||||
|
||||
CUDA Support
|
||||
^^^^^^^^^^^^
|
||||
|
||||
|
||||
@@ -2769,6 +2769,14 @@ def err_deduction_guide_name_not_class_template : Error<
|
||||
"cannot specify deduction guide for "
|
||||
"%select{<error>|function template|variable template|alias template|"
|
||||
"template template parameter|concept|dependent template name}0 %1">;
|
||||
def err_deduction_guide_target_attr : Error<
|
||||
"in CUDA/HIP, deduction guides may only be annotated with "
|
||||
"'__host__ __device__'; '__host__'-only, '__device__'-only, or "
|
||||
"'__global__' deduction guides are not allowed">;
|
||||
def warn_deduction_guide_target_attr_deprecated : Warning<
|
||||
"use of CUDA/HIP target attributes on deduction guides is deprecated; "
|
||||
"they will be rejected in a future version of Clang">,
|
||||
InGroup<DeprecatedAttributes>;
|
||||
def err_deduction_guide_wrong_scope : Error<
|
||||
"deduction guide must be declared in the same scope as template %q0">;
|
||||
def err_deduction_guide_defines_function : Error<
|
||||
|
||||
@@ -215,6 +215,12 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
|
||||
if (D == nullptr)
|
||||
return CurCUDATargetCtx.Target;
|
||||
|
||||
// C++ deduction guides are never codegen'ed and only participate in template
|
||||
// argument deduction. Treat them as if they were always host+device so that
|
||||
// CUDA/HIP target checking never rejects their use based solely on target.
|
||||
if (isa<CXXDeductionGuideDecl>(D))
|
||||
return CUDAFunctionTarget::HostDevice;
|
||||
|
||||
if (D->hasAttr<CUDAInvalidTargetAttr>())
|
||||
return CUDAFunctionTarget::InvalidTarget;
|
||||
|
||||
@@ -986,6 +992,12 @@ bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
|
||||
if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
|
||||
return true;
|
||||
|
||||
// C++ deduction guides participate in overload resolution but are not
|
||||
// callable functions and are never codegen'ed. Treat them as always
|
||||
// allowed for CUDA/HIP compatibility checking.
|
||||
if (isa<CXXDeductionGuideDecl>(Callee))
|
||||
return true;
|
||||
|
||||
// FIXME: Is bailing out early correct here? Should we instead assume that
|
||||
// the caller is a global initializer?
|
||||
FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
|
||||
|
||||
@@ -8056,6 +8056,30 @@ void Sema::ProcessDeclAttributeList(
|
||||
}
|
||||
}
|
||||
|
||||
// CUDA/HIP: restrict explicit CUDA target attributes on deduction guides.
|
||||
//
|
||||
// Deduction guides are not callable functions and never participate in
|
||||
// codegen; they are always treated as host+device for CUDA/HIP semantic
|
||||
// checks. We therefore allow either no CUDA target attributes or an explicit
|
||||
// '__host__ __device__' annotation, but reject guides that are host-only,
|
||||
// device-only, or marked '__global__'. The use of explicit CUDA/HIP target
|
||||
// attributes on deduction guides is deprecated and will be rejected in a
|
||||
// future Clang version.
|
||||
if (getLangOpts().CUDA)
|
||||
if (auto *Guide = dyn_cast<CXXDeductionGuideDecl>(D)) {
|
||||
bool HasHost = Guide->hasAttr<CUDAHostAttr>();
|
||||
bool HasDevice = Guide->hasAttr<CUDADeviceAttr>();
|
||||
bool HasGlobal = Guide->hasAttr<CUDAGlobalAttr>();
|
||||
|
||||
if (HasGlobal || HasHost != HasDevice) {
|
||||
Diag(Guide->getLocation(), diag::err_deduction_guide_target_attr);
|
||||
Guide->setInvalidDecl();
|
||||
} else if (HasHost && HasDevice) {
|
||||
Diag(Guide->getLocation(),
|
||||
diag::warn_deduction_guide_target_attr_deprecated);
|
||||
}
|
||||
}
|
||||
|
||||
// Do not permit 'constructor' or 'destructor' attributes on __device__ code.
|
||||
if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() &&
|
||||
(D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) &&
|
||||
|
||||
@@ -54,6 +54,26 @@ using namespace clang;
|
||||
using namespace sema;
|
||||
|
||||
namespace {
|
||||
|
||||
/// Return true if two associated-constraint sets are semantically equal.
|
||||
static bool HaveSameAssociatedConstraints(
|
||||
Sema &SemaRef, const NamedDecl *Old, ArrayRef<AssociatedConstraint> OldACs,
|
||||
const NamedDecl *New, ArrayRef<AssociatedConstraint> NewACs) {
|
||||
if (OldACs.size() != NewACs.size())
|
||||
return false;
|
||||
if (OldACs.empty())
|
||||
return true;
|
||||
|
||||
// General case: pairwise compare each associated constraint expression.
|
||||
Sema::TemplateCompareNewDeclInfo NewInfo(New);
|
||||
for (size_t I = 0, E = OldACs.size(); I != E; ++I)
|
||||
if (!SemaRef.AreConstraintExpressionsEqual(
|
||||
Old, OldACs[I].ConstraintExpr, NewInfo, NewACs[I].ConstraintExpr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Tree transform to "extract" a transformed type from a class template's
|
||||
/// constructor to a deduction guide.
|
||||
class ExtractTypeForDeductionGuide
|
||||
@@ -218,9 +238,51 @@ buildDeductionGuide(Sema &SemaRef, TemplateDecl *OriginalTemplate,
|
||||
TInfo->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams();
|
||||
|
||||
// Build the implicit deduction guide template.
|
||||
QualType GuideType = TInfo->getType();
|
||||
|
||||
// In CUDA/HIP mode, avoid duplicate implicit guides that differ only in CUDA
|
||||
// target attributes (same constructor signature and constraints).
|
||||
if (IsImplicit && Ctor && SemaRef.getLangOpts().CUDA) {
|
||||
SmallVector<AssociatedConstraint, 4> NewACs;
|
||||
Ctor->getAssociatedConstraints(NewACs);
|
||||
|
||||
for (NamedDecl *Existing : DC->lookup(DeductionGuideName)) {
|
||||
auto *ExistingFT = dyn_cast<FunctionTemplateDecl>(Existing);
|
||||
auto *ExistingGuide =
|
||||
ExistingFT
|
||||
? dyn_cast<CXXDeductionGuideDecl>(ExistingFT->getTemplatedDecl())
|
||||
: dyn_cast<CXXDeductionGuideDecl>(Existing);
|
||||
if (!ExistingGuide)
|
||||
continue;
|
||||
|
||||
// Only consider guides that were also synthesized from a constructor.
|
||||
auto *ExistingCtor = ExistingGuide->getCorrespondingConstructor();
|
||||
if (!ExistingCtor)
|
||||
continue;
|
||||
|
||||
// If the underlying constructors are overloads (different signatures once
|
||||
// CUDA attributes are ignored), they should each get their own guides.
|
||||
if (SemaRef.IsOverload(Ctor, ExistingCtor,
|
||||
/*UseMemberUsingDeclRules=*/false,
|
||||
/*ConsiderCudaAttrs=*/false))
|
||||
continue;
|
||||
|
||||
// At this point, the constructors have the same signature ignoring CUDA
|
||||
// attributes. Decide whether their associated constraints are also the
|
||||
// same; only in that case do we treat one guide as a duplicate of the
|
||||
// other.
|
||||
SmallVector<AssociatedConstraint, 4> ExistingACs;
|
||||
ExistingCtor->getAssociatedConstraints(ExistingACs);
|
||||
|
||||
if (HaveSameAssociatedConstraints(SemaRef, ExistingCtor, ExistingACs,
|
||||
Ctor, NewACs))
|
||||
return Existing;
|
||||
}
|
||||
}
|
||||
|
||||
auto *Guide = CXXDeductionGuideDecl::Create(
|
||||
SemaRef.Context, DC, LocStart, ES, Name, TInfo->getType(), TInfo, LocEnd,
|
||||
Ctor, DeductionCandidate::Normal, FunctionTrailingRC);
|
||||
SemaRef.Context, DC, LocStart, ES, Name, GuideType, TInfo, LocEnd, Ctor,
|
||||
DeductionCandidate::Normal, FunctionTrailingRC);
|
||||
Guide->setImplicit(IsImplicit);
|
||||
Guide->setParams(Params);
|
||||
|
||||
|
||||
32
clang/test/SemaCUDA/deduction-guide-attrs.cu
Normal file
32
clang/test/SemaCUDA/deduction-guide-attrs.cu
Normal file
@@ -0,0 +1,32 @@
|
||||
// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -fcuda-is-device -verify %s
|
||||
// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
template <typename T>
|
||||
struct S {
|
||||
__host__ __device__ S(T);
|
||||
};
|
||||
|
||||
// A host+device deduction guide is allowed and participates in CTAD, but its
|
||||
// explicit target attributes are deprecated and will be rejected in a future
|
||||
// Clang version.
|
||||
template <typename T>
|
||||
__host__ __device__ S(T) -> S<T>; // expected-warning {{use of CUDA/HIP target attributes on deduction guides is deprecated; they will be rejected in a future version of Clang}}
|
||||
|
||||
__host__ __device__ void use_hd_guide() {
|
||||
S s(42); // uses the explicit __host__ __device__ deduction guide above
|
||||
}
|
||||
|
||||
// CUDA/HIP target attributes on deduction guides are rejected when they make
|
||||
// the guide host-only, device-only, or a kernel.
|
||||
template <typename U>
|
||||
__host__ S(U) -> S<U>; // expected-error {{in CUDA/HIP, deduction guides may only be annotated with '__host__ __device__'; '__host__'-only, '__device__'-only, or '__global__' deduction guides are not allowed}}
|
||||
|
||||
template <typename V>
|
||||
__device__ S(V) -> S<V>; // expected-error {{in CUDA/HIP, deduction guides may only be annotated with '__host__ __device__'; '__host__'-only, '__device__'-only, or '__global__' deduction guides are not allowed}}
|
||||
|
||||
template <typename W>
|
||||
__global__ S(W) -> S<W>; // expected-error {{in CUDA/HIP, deduction guides may only be annotated with '__host__ __device__'; '__host__'-only, '__device__'-only, or '__global__' deduction guides are not allowed}}
|
||||
111
clang/test/SemaCUDA/deduction-guide-overload.cu
Normal file
111
clang/test/SemaCUDA/deduction-guide-overload.cu
Normal file
@@ -0,0 +1,111 @@
|
||||
// RUN: %clang_cc1 -std=c++20 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -fcuda-is-device -verify %s
|
||||
// RUN: %clang_cc1 -std=c++20 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -verify %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// This test exercises class template argument deduction (CTAD) when there are
|
||||
// multiple constructors that differ only by constraints. In CUDA/HIP mode, the
|
||||
// implementation must *not* collapse implicit deduction guides that have the
|
||||
// same function type but different constraints; otherwise, CTAD can lose viable
|
||||
// candidates.
|
||||
|
||||
template <typename T>
|
||||
concept Signed = __is_signed(T);
|
||||
|
||||
template <typename T>
|
||||
concept NotSigned = !Signed<T>;
|
||||
|
||||
// 1) Constrained ctors with different constraints: ensure we keep
|
||||
// deduction guides that differ only by constraints.
|
||||
|
||||
template <typename T>
|
||||
struct OverloadCTAD {
|
||||
__host__ __device__ OverloadCTAD(T) requires Signed<T>;
|
||||
__host__ __device__ OverloadCTAD(T) requires NotSigned<T>;
|
||||
};
|
||||
|
||||
__host__ __device__ void use_overload_ctad_hd() {
|
||||
OverloadCTAD a(1); // T = int, uses Signed-constrained guide
|
||||
OverloadCTAD b(1u); // T = unsigned int, uses NotSigned-constrained guide
|
||||
}
|
||||
|
||||
__device__ void use_overload_ctad_dev() {
|
||||
OverloadCTAD c(1);
|
||||
OverloadCTAD d(1u);
|
||||
}
|
||||
|
||||
__global__ void use_overload_ctad_global() {
|
||||
OverloadCTAD e(1);
|
||||
OverloadCTAD f(1u);
|
||||
}
|
||||
|
||||
// 2) Add a pair of constructors that have the same signature and the same
|
||||
// constraint but differ only by CUDA target attributes. This exercises the
|
||||
// case where two implicit deduction guides would be identical except for
|
||||
// their originating constructor's CUDA target.
|
||||
|
||||
template <typename T>
|
||||
struct OverloadCTADTargets {
|
||||
__host__ OverloadCTADTargets(T) requires Signed<T>;
|
||||
__device__ OverloadCTADTargets(T) requires Signed<T>;
|
||||
};
|
||||
|
||||
__host__ void use_overload_ctad_targets_host() {
|
||||
OverloadCTADTargets g(1);
|
||||
}
|
||||
|
||||
__device__ void use_overload_ctad_targets_device() {
|
||||
OverloadCTADTargets h(1);
|
||||
}
|
||||
|
||||
// 3) Unconstrained host/device duplicates: identical signatures and no
|
||||
// constraints, differing only by CUDA target attributes.
|
||||
|
||||
template <typename T>
|
||||
struct UnconstrainedHD {
|
||||
__host__ UnconstrainedHD(T);
|
||||
__device__ UnconstrainedHD(T);
|
||||
};
|
||||
|
||||
__host__ __device__ void use_unconstrained_hd_hd() {
|
||||
UnconstrainedHD u1(1);
|
||||
}
|
||||
|
||||
__device__ void use_unconstrained_hd_dev() {
|
||||
UnconstrainedHD u2(1);
|
||||
}
|
||||
|
||||
__global__ void use_unconstrained_hd_global() {
|
||||
UnconstrainedHD u3(1);
|
||||
}
|
||||
|
||||
// 4) Constrained vs unconstrained ctors with the same signature: guides
|
||||
// must not be collapsed away when constraints differ.
|
||||
|
||||
template <typename T>
|
||||
concept IsInt = __is_same(T, int);
|
||||
|
||||
template <typename T>
|
||||
struct ConstrainedVsUnconstrained {
|
||||
__host__ __device__ ConstrainedVsUnconstrained(T);
|
||||
__host__ __device__ ConstrainedVsUnconstrained(T) requires IsInt<T>;
|
||||
};
|
||||
|
||||
__host__ __device__ void use_constrained_vs_unconstrained_hd() {
|
||||
ConstrainedVsUnconstrained a(1); // T = int, constrained guide viable
|
||||
ConstrainedVsUnconstrained b(1u); // T = unsigned, only unconstrained guide
|
||||
}
|
||||
|
||||
__device__ void use_constrained_vs_unconstrained_dev() {
|
||||
ConstrainedVsUnconstrained c(1);
|
||||
ConstrainedVsUnconstrained d(1u);
|
||||
}
|
||||
|
||||
__global__ void use_constrained_vs_unconstrained_global() {
|
||||
ConstrainedVsUnconstrained e(1);
|
||||
ConstrainedVsUnconstrained f(1u);
|
||||
}
|
||||
|
||||
47
clang/test/SemaCUDA/deduction-guide.cu
Normal file
47
clang/test/SemaCUDA/deduction-guide.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -fcuda-is-device -verify=expected,dev %s
|
||||
// RUN: %clang_cc1 -std=c++17 -triple nvptx64-nvidia-cuda -fsyntax-only \
|
||||
// RUN: -verify %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
template <class T>
|
||||
struct CTADType { // expected-note 2{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 3 were provided}}
|
||||
// expected-note@-1 2{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 3 were provided}}
|
||||
T first;
|
||||
T second;
|
||||
|
||||
CTADType(T x) : first(x), second(x) {} // expected-note 2{{candidate constructor not viable: requires single argument 'x', but 3 arguments were provided}}
|
||||
__device__ CTADType(T x) : first(x), second(x) {} // expected-note 2{{candidate constructor not viable: requires single argument 'x', but 3 arguments were provided}}
|
||||
__host__ __device__ CTADType(T x, T y) : first(x), second(y) {} // expected-note 2{{candidate constructor not viable: requires 2 arguments, but 3 were provided}}
|
||||
CTADType(T x, T y, T z) : first(x), second(z) {} // dev-note {{'CTADType' declared here}}
|
||||
// expected-note@-1 {{candidate constructor not viable: call to __host__ function from __device__ function}}
|
||||
// expected-note@-2 {{candidate constructor not viable: call to __host__ function from __global__ function}}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
CTADType(T, T) -> CTADType<T>;
|
||||
|
||||
__host__ __device__ void use_ctad_host_device() {
|
||||
CTADType ctad_from_two_args(1, 1);
|
||||
CTADType ctad_from_one_arg(1);
|
||||
CTADType ctad_from_three_args(1, 2, 3); // dev-error {{reference to __host__ function 'CTADType' in __host__ __device__ function}}
|
||||
}
|
||||
|
||||
__host__ void use_ctad_host() {
|
||||
CTADType ctad_from_two_args(1, 1);
|
||||
CTADType ctad_from_one_arg(1);
|
||||
CTADType ctad_from_three_args(1, 2, 3);
|
||||
}
|
||||
|
||||
__device__ void use_ctad_device() {
|
||||
CTADType ctad_from_two_args(1, 1);
|
||||
CTADType ctad_from_one_arg(1);
|
||||
CTADType<int> ctad_from_three_args(1, 2, 3); // expected-error {{no matching constructor for initialization of 'CTADType<int>'}}
|
||||
}
|
||||
|
||||
__global__ void use_ctad_global() {
|
||||
CTADType ctad_from_two_args(1, 1);
|
||||
CTADType ctad_from_one_arg(1);
|
||||
CTADType<int> ctad_from_three_args(1, 2, 3); // expected-error {{no matching constructor for initialization of 'CTADType<int>'}}
|
||||
}
|
||||
Reference in New Issue
Block a user