mirror of
https://github.com/intel/llvm.git
synced 2026-01-23 07:58:23 +08:00
X86: add F16C support in Clang
Support the following intrinsics: _mm_cvtph_ps, _mm256_cvtph_ps, _mm_cvtps_ph, _mm256_cvtps_ph rdar://12407875 llvm-svn: 165685
This commit is contained in:
@@ -613,6 +613,12 @@ BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iV8iC*V8iV8iIc", "")
|
||||
BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iV4iC*V2LLiV4iIc", "")
|
||||
BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iV4iC*V4LLiV4iIc", "")
|
||||
|
||||
// F16C
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "")
|
||||
|
||||
// RDRAND
|
||||
BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "")
|
||||
BUILTIN(__builtin_ia32_rdrand32_step, "UiUi*", "")
|
||||
|
||||
@@ -823,6 +823,7 @@ def mno_popcnt : Flag<"-mno-popcnt">, Group<m_x86_Features_Group>;
|
||||
def mno_fma4 : Flag<"-mno-fma4">, Group<m_x86_Features_Group>;
|
||||
def mno_fma : Flag<"-mno-fma">, Group<m_x86_Features_Group>;
|
||||
def mno_xop : Flag<"-mno-xop">, Group<m_x86_Features_Group>;
|
||||
def mno_f16c : Flag<"-mno-f16c">, Group<m_x86_Features_Group>;
|
||||
|
||||
def mno_thumb : Flag<"-mno-thumb">, Group<m_Group>;
|
||||
def marm : Flag<"-marm">, Alias<mno_thumb>;
|
||||
@@ -863,6 +864,7 @@ def mpopcnt : Flag<"-mpopcnt">, Group<m_x86_Features_Group>;
|
||||
def mfma4 : Flag<"-mfma4">, Group<m_x86_Features_Group>;
|
||||
def mfma : Flag<"-mfma">, Group<m_x86_Features_Group>;
|
||||
def mxop : Flag<"-mxop">, Group<m_x86_Features_Group>;
|
||||
def mf16c : Flag<"-mf16c">, Group<m_x86_Features_Group>;
|
||||
def mips16 : Flag<"-mips16">, Group<m_Group>;
|
||||
def mno_mips16 : Flag<"-mno-mips16">, Group<m_Group>;
|
||||
def mdsp : Flag<"-mdsp">, Group<m_Group>;
|
||||
|
||||
@@ -1360,6 +1360,7 @@ class X86TargetInfo : public TargetInfo {
|
||||
bool HasFMA4;
|
||||
bool HasFMA;
|
||||
bool HasXOP;
|
||||
bool HasF16C;
|
||||
|
||||
/// \brief Enumeration of all of the X86 CPUs supported by Clang.
|
||||
///
|
||||
@@ -1506,7 +1507,8 @@ public:
|
||||
: TargetInfo(triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow),
|
||||
HasAES(false), HasPCLMUL(false), HasLZCNT(false), HasRDRND(false),
|
||||
HasBMI(false), HasBMI2(false), HasPOPCNT(false), HasSSE4a(false),
|
||||
HasFMA4(false), HasFMA(false), HasXOP(false), CPU(CK_Generic) {
|
||||
HasFMA4(false), HasFMA(false), HasXOP(false), HasF16C(false),
|
||||
CPU(CK_Generic) {
|
||||
BigEndian = false;
|
||||
LongDoubleFormat = &llvm::APFloat::x87DoubleExtended;
|
||||
}
|
||||
@@ -1712,6 +1714,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
|
||||
Features["fma4"] = false;
|
||||
Features["fma"] = false;
|
||||
Features["xop"] = false;
|
||||
Features["f16c"] = false;
|
||||
|
||||
// FIXME: This *really* should not be here.
|
||||
|
||||
@@ -1922,6 +1925,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
|
||||
Features["bmi2"] = true;
|
||||
else if (Name == "popcnt")
|
||||
Features["popcnt"] = true;
|
||||
else if (Name == "f16c")
|
||||
Features["f16c"] = true;
|
||||
} else {
|
||||
if (Name == "mmx")
|
||||
Features["mmx"] = Features["3dnow"] = Features["3dnowa"] = false;
|
||||
@@ -1982,6 +1987,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
|
||||
Features["fma4"] = Features["xop"] = false;
|
||||
else if (Name == "xop")
|
||||
Features["xop"] = false;
|
||||
else if (Name == "f16c")
|
||||
Features["f16c"] = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -2053,6 +2060,11 @@ void X86TargetInfo::HandleTargetFeatures(std::vector<std::string> &Features) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Feature == "f16c") {
|
||||
HasF16C = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(Features[i][0] == '+' && "Invalid target feature!");
|
||||
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
|
||||
.Case("avx2", AVX2)
|
||||
@@ -2261,6 +2273,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
if (HasXOP)
|
||||
Builder.defineMacro("__XOP__");
|
||||
|
||||
if (HasF16C)
|
||||
Builder.defineMacro("__F16C__");
|
||||
|
||||
// Each case falls through to the previous one here.
|
||||
switch (SSELevel) {
|
||||
case AVX2:
|
||||
@@ -2344,6 +2359,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("x86_32", PointerWidth == 32)
|
||||
.Case("x86_64", PointerWidth == 64)
|
||||
.Case("xop", HasXOP)
|
||||
.Case("f16c", HasF16C)
|
||||
.Default(false);
|
||||
}
|
||||
|
||||
|
||||
58
clang/lib/Headers/f16cintrin.h
Normal file
58
clang/lib/Headers/f16cintrin.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __F16C__
|
||||
# error "F16C instruction is not enabled"
|
||||
#endif /* __F16C__ */
|
||||
|
||||
#ifndef __F16CINTRIN_H
|
||||
#define __F16CINTRIN_H
|
||||
|
||||
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
|
||||
typedef float __m256 __attribute__ ((__vector_size__ (32)));
|
||||
|
||||
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
|
||||
__m128 __a = (a); \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
|
||||
|
||||
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
|
||||
__m256 __a = (a); \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
|
||||
|
||||
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cvtph_ps(__m128i a)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a);
|
||||
}
|
||||
|
||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cvtph_ps(__m128i a)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a);
|
||||
}
|
||||
|
||||
#endif /* __F16CINTRIN_H */
|
||||
@@ -58,6 +58,10 @@
|
||||
#include <xopintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __F16C__
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
||||
// FIXME: LWP
|
||||
|
||||
#endif /* __X86INTRIN_H */
|
||||
|
||||
26
clang/test/CodeGen/f16c-builtins.c
Normal file
26
clang/test/CodeGen/f16c-builtins.c
Normal file
@@ -0,0 +1,26 @@
|
||||
// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// Don't include mm_malloc.h, it's system specific.
|
||||
#define __MM_MALLOC_H
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128 test_mm_cvtph_ps(__m128i a) {
|
||||
// CHECK: @llvm.x86.vcvtph2ps.128
|
||||
return _mm_cvtph_ps(a);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cvtph_ps(__m128i a) {
|
||||
// CHECK: @llvm.x86.vcvtph2ps.256
|
||||
return _mm256_cvtph_ps(a);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtps_ph(__m128 a) {
|
||||
// CHECK: @llvm.x86.vcvtps2ph.128
|
||||
return _mm_cvtps_ph(a, 0);
|
||||
}
|
||||
|
||||
__m128i test_mm256_cvtps_ph(__m256 a) {
|
||||
// CHECK: @llvm.x86.vcvtps2ph.256
|
||||
return _mm256_cvtps_ph(a, 0);
|
||||
}
|
||||
Reference in New Issue
Block a user