mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
[X86] Add CRC32 feature.
d8faf03807 implemented general-regs-only for X86 by disabling all features
with vector instructions. But the CRC32 instruction in SSE4.2 ISA, which uses
only GPRs, also becomes unavailable. This patch adds a CRC32 feature for this
instruction and allows it to be used with general-regs-only.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D105462
This commit is contained in:
@@ -3590,6 +3590,8 @@ X86
|
||||
|
||||
.. option:: -mclzero, -mno-clzero
|
||||
|
||||
.. option:: -mcrc32, -mno-crc32
|
||||
|
||||
.. option:: -mcx16, -mno-cx16
|
||||
|
||||
.. option:: -menqcmd, -mno-enqcmd
|
||||
|
||||
@@ -421,9 +421,9 @@ TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2
|
||||
TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
|
||||
TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
|
||||
|
||||
// SSE4a
|
||||
TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
|
||||
|
||||
@@ -44,7 +44,7 @@ TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "OiV2d", "ncV:128:", "sse2")
|
||||
TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
|
||||
TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32")
|
||||
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
|
||||
TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
|
||||
TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
|
||||
|
||||
@@ -4206,6 +4206,8 @@ def mwbnoinvd : Flag<["-"], "mwbnoinvd">, Group<m_x86_Features_Group>;
|
||||
def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group<m_x86_Features_Group>;
|
||||
def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
|
||||
def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
|
||||
def mcrc32 : Flag<["-"], "mcrc32">, Group<m_x86_Features_Group>;
|
||||
def mno_crc32 : Flag<["-"], "mno-crc32">, Group<m_x86_Features_Group>;
|
||||
def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
|
||||
def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
|
||||
def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;
|
||||
|
||||
@@ -155,6 +155,12 @@ bool X86TargetInfo::initFeatureMap(
|
||||
llvm::find(UpdatedFeaturesVec, "-xsave") == UpdatedFeaturesVec.end())
|
||||
Features["xsave"] = true;
|
||||
|
||||
// Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
|
||||
I = Features.find("sse4.2");
|
||||
if (I != Features.end() && I->getValue() &&
|
||||
llvm::find(UpdatedFeaturesVec, "-crc32") == UpdatedFeaturesVec.end())
|
||||
Features["crc32"] = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -330,6 +336,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
HasTSXLDTRK = true;
|
||||
} else if (Feature == "+uintr") {
|
||||
HasUINTR = true;
|
||||
} else if (Feature == "+crc32") {
|
||||
HasCRC32 = true;
|
||||
}
|
||||
|
||||
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
|
||||
@@ -758,6 +766,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
Builder.defineMacro("__TSXLDTRK__");
|
||||
if (HasUINTR)
|
||||
Builder.defineMacro("__UINTR__");
|
||||
if (HasCRC32)
|
||||
Builder.defineMacro("__CRC32__");
|
||||
|
||||
// Each case falls through to the previous one here.
|
||||
switch (SSELevel) {
|
||||
@@ -878,6 +888,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
||||
.Case("clflushopt", true)
|
||||
.Case("clwb", true)
|
||||
.Case("clzero", true)
|
||||
.Case("crc32", true)
|
||||
.Case("cx16", true)
|
||||
.Case("enqcmd", true)
|
||||
.Case("f16c", true)
|
||||
@@ -970,6 +981,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("clflushopt", HasCLFLUSHOPT)
|
||||
.Case("clwb", HasCLWB)
|
||||
.Case("clzero", HasCLZERO)
|
||||
.Case("crc32", HasCRC32)
|
||||
.Case("cx8", HasCX8)
|
||||
.Case("cx16", HasCX16)
|
||||
.Case("enqcmd", HasENQCMD)
|
||||
|
||||
@@ -143,6 +143,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
||||
bool HasSERIALIZE = false;
|
||||
bool HasTSXLDTRK = false;
|
||||
bool HasUINTR = false;
|
||||
bool HasCRC32 = false;
|
||||
|
||||
protected:
|
||||
llvm::X86::CPUKind CPU = llvm::X86::CK_None;
|
||||
|
||||
@@ -58,6 +58,7 @@ set(files
|
||||
cet.h
|
||||
cldemoteintrin.h
|
||||
clzerointrin.h
|
||||
crc32intrin.h
|
||||
cpuid.h
|
||||
clflushoptintrin.h
|
||||
clwbintrin.h
|
||||
|
||||
100
clang/lib/Headers/crc32intrin.h
Normal file
100
clang/lib/Headers/crc32intrin.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __CRC32INTRIN_H
|
||||
#define __CRC32INTRIN_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("crc32")))
|
||||
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned char operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u8(unsigned int __C, unsigned char __D)
|
||||
{
|
||||
return __builtin_ia32_crc32qi(__C, __D);
|
||||
}
|
||||
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned short operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u16(unsigned int __C, unsigned short __D)
|
||||
{
|
||||
return __builtin_ia32_crc32hi(__C, __D);
|
||||
}
|
||||
|
||||
/// Adds the first unsigned integer operand to the CRC-32C checksum of
|
||||
/// the second unsigned integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u32(unsigned int __C, unsigned int __D)
|
||||
{
|
||||
return __builtin_ia32_crc32si(__C, __D);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned 64-bit integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
|
||||
{
|
||||
return __builtin_ia32_crc32di(__C, __D);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __CRC32INTRIN_H */
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
|
||||
#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
|
||||
|
||||
#if defined(__cplusplus) && (__cplusplus >= 201103L)
|
||||
#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
|
||||
@@ -282,7 +282,7 @@ _castu64_f64(unsigned long long __A) {
|
||||
* \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
* operand \a __D.
|
||||
*/
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
|
||||
__crc32b(unsigned int __C, unsigned char __D)
|
||||
{
|
||||
return __builtin_ia32_crc32qi(__C, __D);
|
||||
@@ -303,7 +303,7 @@ __crc32b(unsigned int __C, unsigned char __D)
|
||||
* \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
* operand \a __D.
|
||||
*/
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
|
||||
__crc32w(unsigned int __C, unsigned short __D)
|
||||
{
|
||||
return __builtin_ia32_crc32hi(__C, __D);
|
||||
@@ -324,7 +324,7 @@ __crc32w(unsigned int __C, unsigned short __D)
|
||||
* \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
* operand \a __D.
|
||||
*/
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
|
||||
__crc32d(unsigned int __C, unsigned int __D)
|
||||
{
|
||||
return __builtin_ia32_crc32si(__C, __D);
|
||||
@@ -346,7 +346,7 @@ __crc32d(unsigned int __C, unsigned int __D)
|
||||
* \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
* operand \a __D.
|
||||
*/
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32
|
||||
__crc32q(unsigned long long __C, unsigned long long __D)
|
||||
{
|
||||
return __builtin_ia32_crc32di(__C, __D);
|
||||
@@ -435,7 +435,7 @@ __rorq(unsigned long long __X, int __C) {
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_CAST
|
||||
#undef __DEFAULT_FN_ATTRS_SSE42
|
||||
#undef __DEFAULT_FN_ATTRS_CRC32
|
||||
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
|
||||
|
||||
#endif /* __IA32INTRIN_H */
|
||||
|
||||
@@ -2338,91 +2338,10 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
|
||||
return (__m128i)((__v2di)__V1 > (__v2di)__V2);
|
||||
}
|
||||
|
||||
/* SSE4.2 Accumulate CRC32. */
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned char operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u8(unsigned int __C, unsigned char __D)
|
||||
{
|
||||
return __builtin_ia32_crc32qi(__C, __D);
|
||||
}
|
||||
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned short operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u16(unsigned int __C, unsigned short __D)
|
||||
{
|
||||
return __builtin_ia32_crc32hi(__C, __D);
|
||||
}
|
||||
|
||||
/// Adds the first unsigned integer operand to the CRC-32C checksum of
|
||||
/// the second unsigned integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u32(unsigned int __C, unsigned int __D)
|
||||
{
|
||||
return __builtin_ia32_crc32si(__C, __D);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned 64-bit integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
|
||||
///
|
||||
/// \param __C
|
||||
/// An unsigned integer operand to add to the CRC-32C checksum of operand
|
||||
/// \a __D.
|
||||
/// \param __D
|
||||
/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
|
||||
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
|
||||
/// operand \a __D.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
|
||||
{
|
||||
return __builtin_ia32_crc32di(__C, __D);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#include <popcntintrin.h>
|
||||
|
||||
#include <crc32intrin.h>
|
||||
|
||||
#endif /* __SMMINTRIN_H */
|
||||
|
||||
@@ -20,6 +20,11 @@
|
||||
#include <uintrintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CRC32__)
|
||||
#include <crc32intrin.h>
|
||||
#endif
|
||||
|
||||
#define __SSC_MARK(Tag) \
|
||||
__asm__ __volatile__("movl %%ebx, %%eax; movl %0, %%ebx; .byte 0x64, 0x67, " \
|
||||
"0x90; movl %%eax, %%ebx;" ::"i"(Tag) \
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
|
||||
// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
|
||||
// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
@@ -28,3 +30,29 @@ unsigned long long test__crc32q(unsigned long long CRC, unsigned long long V) {
|
||||
return __crc32q(CRC, V);
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) {
|
||||
// CHECK-LABEL: test_mm_crc32_u8
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
return _mm_crc32_u8(CRC, V);
|
||||
}
|
||||
|
||||
unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) {
|
||||
// CHECK-LABEL: test_mm_crc32_u16
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}})
|
||||
return _mm_crc32_u16(CRC, V);
|
||||
}
|
||||
|
||||
unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) {
|
||||
// CHECK-LABEL: test_mm_crc32_u32
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}})
|
||||
return _mm_crc32_u32(CRC, V);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
|
||||
// CHECK64-LABEL: test_mm_crc32_u64
|
||||
// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}})
|
||||
return _mm_crc32_u64(CRC, V);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -270,6 +270,6 @@ int DispatchFirst(void) {return 1;}
|
||||
// WINDOWS: define dso_local i32 @DispatchFirst.B
|
||||
// WINDOWS: ret i32 1
|
||||
|
||||
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
|
||||
|
||||
55
clang/test/CodeGen/attr-target-crc32-x86.c
Normal file
55
clang/test/CodeGen/attr-target-crc32-x86.c
Normal file
@@ -0,0 +1,55 @@
|
||||
// Test crc32 target attribute on x86
|
||||
|
||||
// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
// CHECK: define{{.*}} i32 @test1({{.*}}) [[TEST1_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
|
||||
#define __MM_MALLOC_H
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
unsigned int __attribute__((target("crc32"))) test1(unsigned int CRC, unsigned char V) {
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test2({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
unsigned int __attribute__((target("general-regs-only,crc32"))) test2(unsigned int CRC, unsigned char V) {
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test3({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
unsigned int __attribute__((target("crc32,general-regs-only"))) test3(unsigned int CRC, unsigned char V) {
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test4({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
unsigned int __attribute__((target("sse4.2"))) test4(unsigned int CRC, unsigned char V) {
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test5({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
unsigned int __attribute__((target("sse4.2,general-regs-only,crc32"))) test5(unsigned int CRC, unsigned char V) {
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test6({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
|
||||
int __attribute__((target("sse4.2,no-crc32,crc32"))) test6(__m128i A, int LA, __m128i B, int LB) {
|
||||
return _mm_cmpestra(A, LA, B, LB, 7);
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @test7({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
|
||||
// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
|
||||
int __attribute__((target("no-crc32,crc32,sse4.2"))) test7(__m128i A, int LA, __m128i B, int LB) {
|
||||
return _mm_cmpestra(A, LA, B, LB, 7);
|
||||
}
|
||||
|
||||
// CHECK: attributes [[TEST1_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}"
|
||||
// CHECK: attributes [[GPR_ONLY_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
|
||||
// CHECK: attributes [[TEST4_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}+sse4.2{{.*}}"
|
||||
@@ -52,12 +52,12 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
|
||||
// CHECK: use_before_def{{.*}} #7
|
||||
// CHECK: walrus{{.*}} #8
|
||||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
|
||||
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
|
||||
@@ -65,8 +65,8 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
|
||||
// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
|
||||
|
||||
// CHECK: "target-cpu"="x86-64-v2"
|
||||
// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
|
||||
// CHECK-SAME: "target-features"="+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
|
||||
// CHECK: "target-cpu"="x86-64-v3"
|
||||
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK: "target-cpu"="x86-64-v4"
|
||||
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
|
||||
|
||||
41
clang/test/Driver/x86-mcrc32.c
Normal file
41
clang/test/Driver/x86-mcrc32.c
Normal file
@@ -0,0 +1,41 @@
|
||||
// Test interaction between -mcrc32 and other SIMD ISA options on x86
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
|
||||
|
||||
unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
|
||||
// CHECK-LABEL: test__crc32b
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32
|
||||
|
||||
// IR-CRC32: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}"
|
||||
54
clang/test/Driver/x86-mgeneral-regs-only-crc32.c
Normal file
54
clang/test/Driver/x86-mgeneral-regs-only-crc32.c
Normal file
@@ -0,0 +1,54 @@
|
||||
// Test the -mgeneral-regs-only with -mcrc32 option on x86
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
|
||||
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -mcrc32 -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
|
||||
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
|
||||
// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
|
||||
|
||||
// CMD-BEFORE: "-target-feature" "+crc32"
|
||||
// CMD: "-target-feature" "-x87"
|
||||
// CMD: "-target-feature" "-mmx"
|
||||
// CMD: "-target-feature" "-sse"
|
||||
// CMD-AFTER: "-target-feature" "+crc32"
|
||||
|
||||
unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
|
||||
// CHECK-LABEL: test__crc32b
|
||||
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
|
||||
return __builtin_ia32_crc32qi(CRC, V);
|
||||
}
|
||||
|
||||
// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32
|
||||
|
||||
// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
|
||||
// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+crc32{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}"
|
||||
@@ -298,3 +298,8 @@
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512fp16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512FP16 %s
|
||||
// AVX512FP16: "-target-feature" "+avx512fp16"
|
||||
// NO-AVX512FP16: "-target-feature" "-avx512fp16"
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
|
||||
// CRC32: "-target-feature" "+crc32"
|
||||
// NO-CRC32: "-target-feature" "-crc32"
|
||||
|
||||
@@ -580,3 +580,11 @@
|
||||
|
||||
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
|
||||
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
|
||||
|
||||
// CRC32: #define __CRC32__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s
|
||||
|
||||
// NOCRC32-NOT: #define __CRC32__ 1
|
||||
|
||||
@@ -158,6 +158,7 @@ X86_FEATURE (CLWB, "clwb")
|
||||
X86_FEATURE (CLZERO, "clzero")
|
||||
X86_FEATURE (CMPXCHG16B, "cx16")
|
||||
X86_FEATURE (CMPXCHG8B, "cx8")
|
||||
X86_FEATURE (CRC32, "crc32")
|
||||
X86_FEATURE (ENQCMD, "enqcmd")
|
||||
X86_FEATURE (F16C, "f16c")
|
||||
X86_FEATURE (FSGSBASE, "fsgsbase")
|
||||
|
||||
@@ -1071,8 +1071,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
|
||||
setFeature(X86::FEATURE_FMA);
|
||||
if ((ECX >> 19) & 1)
|
||||
setFeature(X86::FEATURE_SSE4_1);
|
||||
if ((ECX >> 20) & 1)
|
||||
if ((ECX >> 20) & 1) {
|
||||
setFeature(X86::FEATURE_SSE4_2);
|
||||
setFeature(X86::FEATURE_CRC32);
|
||||
}
|
||||
if ((ECX >> 23) & 1)
|
||||
setFeature(X86::FEATURE_POPCNT);
|
||||
if ((ECX >> 25) & 1)
|
||||
@@ -1518,6 +1520,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
|
||||
Features["cx16"] = (ECX >> 13) & 1;
|
||||
Features["sse4.1"] = (ECX >> 19) & 1;
|
||||
Features["sse4.2"] = (ECX >> 20) & 1;
|
||||
Features["crc32"] = Features["sse4.2"];
|
||||
Features["movbe"] = (ECX >> 22) & 1;
|
||||
Features["popcnt"] = (ECX >> 23) & 1;
|
||||
Features["aes"] = (ECX >> 25) & 1;
|
||||
|
||||
@@ -139,8 +139,8 @@ constexpr FeatureBitset FeaturesNocona =
|
||||
// Basic 64-bit capable CPU.
|
||||
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
|
||||
constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
|
||||
FeaturePOPCNT | FeatureSSE4_2 |
|
||||
FeatureCMPXCHG16B;
|
||||
FeaturePOPCNT | FeatureCRC32 |
|
||||
FeatureSSE4_2 | FeatureCMPXCHG16B;
|
||||
constexpr FeatureBitset FeaturesX86_64_V3 =
|
||||
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
|
||||
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
|
||||
@@ -153,7 +153,7 @@ constexpr FeatureBitset FeaturesCore2 =
|
||||
FeaturesNocona | FeatureSAHF | FeatureSSSE3;
|
||||
constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
|
||||
constexpr FeatureBitset FeaturesNehalem =
|
||||
FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2;
|
||||
FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2;
|
||||
constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL;
|
||||
constexpr FeatureBitset FeaturesSandyBridge =
|
||||
FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT;
|
||||
@@ -256,16 +256,17 @@ constexpr FeatureBitset FeaturesBTVER1 =
|
||||
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
|
||||
FeatureSAHF;
|
||||
constexpr FeatureBitset FeaturesBTVER2 =
|
||||
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C |
|
||||
FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
|
||||
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 |
|
||||
FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
|
||||
|
||||
// AMD Bulldozer architecture processors.
|
||||
constexpr FeatureBitset FeaturesBDVER1 =
|
||||
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
|
||||
FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP |
|
||||
FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW |
|
||||
FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 |
|
||||
FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE;
|
||||
FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 |
|
||||
FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL |
|
||||
FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 |
|
||||
FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A |
|
||||
FeatureXOP | FeatureXSAVE;
|
||||
constexpr FeatureBitset FeaturesBDVER2 =
|
||||
FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM;
|
||||
constexpr FeatureBitset FeaturesBDVER3 =
|
||||
@@ -278,9 +279,9 @@ constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 | FeatureAVX2 |
|
||||
constexpr FeatureBitset FeaturesZNVER1 =
|
||||
FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 |
|
||||
FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO |
|
||||
FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C |
|
||||
FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureMMX |
|
||||
FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
|
||||
FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT |
|
||||
FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT |
|
||||
FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
|
||||
FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA |
|
||||
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
|
||||
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
|
||||
@@ -472,6 +473,7 @@ constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesCMOV = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
|
||||
constexpr FeatureBitset ImpliedFeaturesFXSR = {};
|
||||
|
||||
@@ -42,6 +42,9 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
|
||||
def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
|
||||
"Support CMPXCHG8B instructions">;
|
||||
|
||||
def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
|
||||
"Enable SSE 4.2 CRC32 instruction">;
|
||||
|
||||
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
|
||||
"Support POPCNT instruction">;
|
||||
|
||||
@@ -624,9 +627,10 @@ def ProcessorFeatures {
|
||||
FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
|
||||
FeatureFXSR, FeatureNOPL, Feature64Bit
|
||||
];
|
||||
list<SubtargetFeature> X86_64V2Features = !listconcat(
|
||||
X86_64V1Features,
|
||||
[FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
|
||||
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
|
||||
FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
|
||||
FeatureSSE42
|
||||
]);
|
||||
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
|
||||
FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
|
||||
FeatureMOVBE, FeatureXSAVE
|
||||
@@ -867,6 +871,7 @@ def ProcessorFeatures {
|
||||
|
||||
// Silvermont
|
||||
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
|
||||
FeatureCRC32,
|
||||
FeaturePOPCNT,
|
||||
FeaturePCLMUL,
|
||||
FeaturePRFCHW,
|
||||
@@ -957,6 +962,7 @@ def ProcessorFeatures {
|
||||
FeatureNOPL,
|
||||
Feature64Bit,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureCRC32,
|
||||
FeaturePOPCNT,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
@@ -1033,6 +1039,7 @@ def ProcessorFeatures {
|
||||
// Jaguar
|
||||
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
|
||||
FeatureAES,
|
||||
FeatureCRC32,
|
||||
FeaturePCLMUL,
|
||||
FeatureBMI,
|
||||
FeatureF16C,
|
||||
@@ -1058,6 +1065,7 @@ def ProcessorFeatures {
|
||||
Feature64Bit,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureAES,
|
||||
FeatureCRC32,
|
||||
FeaturePRFCHW,
|
||||
FeaturePCLMUL,
|
||||
FeatureMMX,
|
||||
@@ -1115,6 +1123,7 @@ def ProcessorFeatures {
|
||||
FeatureCMOV,
|
||||
Feature64Bit,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureCRC32,
|
||||
FeatureF16C,
|
||||
FeatureFMA,
|
||||
FeatureFSGSBase,
|
||||
|
||||
@@ -750,18 +750,19 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[UseSSE42]>;
|
||||
|
||||
// SS42FI - SSE 4.2 instructions with T8XD prefix.
|
||||
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
|
||||
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
|
||||
|
||||
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[UseSSE42]>;
|
||||
|
||||
// CRC32I - SSE 4.2 CRC32 instructions.
|
||||
// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
|
||||
// controlled by the SSE42 flag.
|
||||
class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
|
||||
|
||||
// AVX Instruction Templates:
|
||||
// Instructions introduced in AVX (no SSE equivalent forms)
|
||||
//
|
||||
|
||||
@@ -995,6 +995,7 @@ def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
|
||||
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
|
||||
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
|
||||
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
|
||||
def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
|
||||
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
|
||||
AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
|
||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
|
||||
|
||||
@@ -6588,14 +6588,14 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
|
||||
// of r and m.
|
||||
class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
|
||||
RegisterClass RCIn, SDPatternOperator Int> :
|
||||
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
|
||||
CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
|
||||
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
|
||||
Sched<[WriteCRC32]>;
|
||||
|
||||
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
|
||||
X86MemOperand x86memop, SDPatternOperator Int> :
|
||||
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
|
||||
CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
|
||||
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
|
||||
Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
|
||||
|
||||
@@ -428,6 +428,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
|
||||
/// Processor supports User Level Interrupt instructions
|
||||
bool HasUINTR = false;
|
||||
|
||||
/// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
|
||||
/// function is GPR only)
|
||||
bool HasCRC32 = false;
|
||||
|
||||
/// Processor has a single uop BEXTR implementation.
|
||||
bool HasFastBEXTR = false;
|
||||
|
||||
@@ -767,6 +771,7 @@ public:
|
||||
bool hasSERIALIZE() const { return HasSERIALIZE; }
|
||||
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
|
||||
bool hasUINTR() const { return HasUINTR; }
|
||||
bool hasCRC32() const { return HasCRC32; }
|
||||
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
|
||||
bool useRetpolineIndirectBranches() const {
|
||||
return UseRetpolineIndirectBranches;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
|
||||
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
|
||||
|
||||
54
llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
Normal file
54
llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
Normal file
@@ -0,0 +1,54 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
|
||||
|
||||
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_8:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_8:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
|
||||
|
||||
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_16:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_16:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
|
||||
|
||||
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_32:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_32:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
|
||||
@@ -1,7 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
|
||||
|
||||
declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
|
||||
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
|
||||
@@ -25,4 +23,3 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
|
||||
%tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
29
llvm/test/CodeGen/X86/crc32-target-feature.ll
Normal file
29
llvm/test/CodeGen/X86/crc32-target-feature.ll
Normal file
@@ -0,0 +1,29 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
|
||||
define i32 @test1(i32 %a, i8 %b) nounwind #0 {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: crc32b
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @test2(i32 %a, i8 %b) nounwind #1 {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: crc32b
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @test3(i32 %a, i8 %b) nounwind #2 {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: crc32b
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
|
||||
|
||||
attributes #0 = { "target-features"="+crc32" }
|
||||
attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
|
||||
attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
|
||||
@@ -78,4 +78,4 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8)
|
||||
attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
|
||||
attributes #1 = { "target-cpu"="x86-64" }
|
||||
attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
|
||||
attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" }
|
||||
attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes,+crc32" }
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX1
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX512
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX1
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX512
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X86-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X86-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX1
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX512
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X64-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X64-SSE
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX1
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX512
|
||||
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
|
||||
|
||||
@@ -500,54 +502,3 @@ define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
ret i32 %res
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
|
||||
; X86-LABEL: test_mm_crc32_u8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_crc32_u8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: crc32b %sil, %eax
|
||||
; X64-NEXT: retq
|
||||
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
|
||||
|
||||
define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
|
||||
; X86-LABEL: test_mm_crc32_u16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_crc32_u16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: crc32w %si, %eax
|
||||
; X64-NEXT: retq
|
||||
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
|
||||
|
||||
define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) {
|
||||
; X86-LABEL: test_mm_crc32_u32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_crc32_u32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: crc32l %esi, %eax
|
||||
; X64-NEXT: retq
|
||||
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX512
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX512
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX512
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX512
|
||||
|
||||
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
|
||||
; SSE-LABEL: test_x86_sse42_pcmpestri128:
|
||||
@@ -616,54 +618,3 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
|
||||
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_8:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_8:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
|
||||
|
||||
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_16:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_16:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
|
||||
|
||||
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
|
||||
; X86-LABEL: crc32_32_32:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: crc32_32_32:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
|
||||
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+crc32,+pclmul < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
Reference in New Issue
Block a user