[X86] Add CRC32 feature.

d8faf03807 implemented general-regs-only for X86 by disabling all features
with vector instructions. But the CRC32 instruction in SSE4.2 ISA, which uses
only GPRs, also becomes unavailable. This patch adds a CRC32 feature for this
instruction and allows it to be used with general-regs-only.

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D105462
This commit is contained in:
Tianqing Wang
2021-09-06 13:55:17 +08:00
parent cc9260a0fb
commit 12fa608af4
35 changed files with 484 additions and 248 deletions

View File

@@ -3590,6 +3590,8 @@ X86
.. option:: -mclzero, -mno-clzero
.. option:: -mcrc32, -mno-crc32
.. option:: -mcx16, -mno-cx16
.. option:: -menqcmd, -mno-enqcmd

View File

@@ -421,9 +421,9 @@ TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2
TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
// SSE4a
TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")

View File

@@ -44,7 +44,7 @@ TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "OiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")

View File

@@ -4206,6 +4206,8 @@ def mwbnoinvd : Flag<["-"], "mwbnoinvd">, Group<m_x86_Features_Group>;
def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group<m_x86_Features_Group>;
def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
def mcrc32 : Flag<["-"], "mcrc32">, Group<m_x86_Features_Group>;
def mno_crc32 : Flag<["-"], "mno-crc32">, Group<m_x86_Features_Group>;
def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;

View File

@@ -155,6 +155,12 @@ bool X86TargetInfo::initFeatureMap(
llvm::find(UpdatedFeaturesVec, "-xsave") == UpdatedFeaturesVec.end())
Features["xsave"] = true;
// Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
I = Features.find("sse4.2");
if (I != Features.end() && I->getValue() &&
llvm::find(UpdatedFeaturesVec, "-crc32") == UpdatedFeaturesVec.end())
Features["crc32"] = true;
return true;
}
@@ -330,6 +336,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasTSXLDTRK = true;
} else if (Feature == "+uintr") {
HasUINTR = true;
} else if (Feature == "+crc32") {
HasCRC32 = true;
}
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -758,6 +766,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__TSXLDTRK__");
if (HasUINTR)
Builder.defineMacro("__UINTR__");
if (HasCRC32)
Builder.defineMacro("__CRC32__");
// Each case falls through to the previous one here.
switch (SSELevel) {
@@ -878,6 +888,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("clflushopt", true)
.Case("clwb", true)
.Case("clzero", true)
.Case("crc32", true)
.Case("cx16", true)
.Case("enqcmd", true)
.Case("f16c", true)
@@ -970,6 +981,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("clflushopt", HasCLFLUSHOPT)
.Case("clwb", HasCLWB)
.Case("clzero", HasCLZERO)
.Case("crc32", HasCRC32)
.Case("cx8", HasCX8)
.Case("cx16", HasCX16)
.Case("enqcmd", HasENQCMD)

View File

@@ -143,6 +143,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasSERIALIZE = false;
bool HasTSXLDTRK = false;
bool HasUINTR = false;
bool HasCRC32 = false;
protected:
llvm::X86::CPUKind CPU = llvm::X86::CK_None;

View File

@@ -58,6 +58,7 @@ set(files
cet.h
cldemoteintrin.h
clzerointrin.h
crc32intrin.h
cpuid.h
clflushoptintrin.h
clwbintrin.h

View File

@@ -0,0 +1,100 @@
/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CRC32INTRIN_H
#define __CRC32INTRIN_H
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("crc32")))
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned char operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u8(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
}
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned short operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u16(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
}
/// Adds the first unsigned integer operand to the CRC-32C checksum of
/// the second unsigned integer operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u32(unsigned int __C, unsigned int __D)
{
return __builtin_ia32_crc32si(__C, __D);
}
#ifdef __x86_64__
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned 64-bit integer operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
{
return __builtin_ia32_crc32di(__C, __D);
}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
#endif /* __CRC32INTRIN_H */

View File

@@ -16,7 +16,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
@@ -282,7 +282,7 @@ _castu64_f64(unsigned long long __A) {
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32b(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
@@ -303,7 +303,7 @@ __crc32b(unsigned int __C, unsigned char __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32w(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
@@ -324,7 +324,7 @@ __crc32w(unsigned int __C, unsigned short __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32d(unsigned int __C, unsigned int __D)
{
return __builtin_ia32_crc32si(__C, __D);
@@ -346,7 +346,7 @@ __crc32d(unsigned int __C, unsigned int __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32
__crc32q(unsigned long long __C, unsigned long long __D)
{
return __builtin_ia32_crc32di(__C, __D);
@@ -435,7 +435,7 @@ __rorq(unsigned long long __X, int __C) {
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CAST
#undef __DEFAULT_FN_ATTRS_SSE42
#undef __DEFAULT_FN_ATTRS_CRC32
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif /* __IA32INTRIN_H */

View File

@@ -2338,91 +2338,10 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
return (__m128i)((__v2di)__V1 > (__v2di)__V2);
}
/* SSE4.2 Accumulate CRC32. */
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned char operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u8(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
}
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned short operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u16(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
}
/// Adds the first unsigned integer operand to the CRC-32C checksum of
/// the second unsigned integer operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mm_crc32_u32(unsigned int __C, unsigned int __D)
{
return __builtin_ia32_crc32si(__C, __D);
}
#ifdef __x86_64__
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned 64-bit integer operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
/// \a __D.
/// \param __D
/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
/// \returns The result of adding operand \a __C to the CRC-32C checksum of
/// operand \a __D.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
{
return __builtin_ia32_crc32di(__C, __D);
}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
#include <popcntintrin.h>
#include <crc32intrin.h>
#endif /* __SMMINTRIN_H */

View File

@@ -20,6 +20,11 @@
#include <uintrintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CRC32__)
#include <crc32intrin.h>
#endif
#define __SSC_MARK(Tag) \
__asm__ __volatile__("movl %%ebx, %%eax; movl %0, %%ebx; .byte 0x64, 0x67, " \
"0x90; movl %%eax, %%ebx;" ::"i"(Tag) \

View File

@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <x86intrin.h>
@@ -28,3 +30,29 @@ unsigned long long test__crc32q(unsigned long long CRC, unsigned long long V) {
return __crc32q(CRC, V);
}
#endif
unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) {
// CHECK-LABEL: test_mm_crc32_u8
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
return _mm_crc32_u8(CRC, V);
}
unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) {
// CHECK-LABEL: test_mm_crc32_u16
// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}})
return _mm_crc32_u16(CRC, V);
}
unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) {
// CHECK-LABEL: test_mm_crc32_u32
// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}})
return _mm_crc32_u32(CRC, V);
}
#ifdef __x86_64__
unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
// CHECK64-LABEL: test_mm_crc32_u64
// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}})
return _mm_crc32_u64(CRC, V);
}
#endif

View File

@@ -270,6 +270,6 @@ int DispatchFirst(void) {return 1;}
// WINDOWS: define dso_local i32 @DispatchFirst.B
// WINDOWS: ret i32 1
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"

View File

@@ -0,0 +1,55 @@
// Test crc32 target attribute on x86
// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
// CHECK: define{{.*}} i32 @test1({{.*}}) [[TEST1_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
#define __MM_MALLOC_H
#include <x86intrin.h>
unsigned int __attribute__((target("crc32"))) test1(unsigned int CRC, unsigned char V) {
return __builtin_ia32_crc32qi(CRC, V);
}
// CHECK: define{{.*}} i32 @test2({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
unsigned int __attribute__((target("general-regs-only,crc32"))) test2(unsigned int CRC, unsigned char V) {
return __builtin_ia32_crc32qi(CRC, V);
}
// CHECK: define{{.*}} i32 @test3({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
unsigned int __attribute__((target("crc32,general-regs-only"))) test3(unsigned int CRC, unsigned char V) {
return __builtin_ia32_crc32qi(CRC, V);
}
// CHECK: define{{.*}} i32 @test4({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
unsigned int __attribute__((target("sse4.2"))) test4(unsigned int CRC, unsigned char V) {
return __builtin_ia32_crc32qi(CRC, V);
}
// CHECK: define{{.*}} i32 @test5({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
unsigned int __attribute__((target("sse4.2,general-regs-only,crc32"))) test5(unsigned int CRC, unsigned char V) {
return __builtin_ia32_crc32qi(CRC, V);
}
// CHECK: define{{.*}} i32 @test6({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
int __attribute__((target("sse4.2,no-crc32,crc32"))) test6(__m128i A, int LA, __m128i B, int LB) {
return _mm_cmpestra(A, LA, B, LB, 7);
}
// CHECK: define{{.*}} i32 @test7({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
// CHECK: call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
int __attribute__((target("no-crc32,crc32,sse4.2"))) test7(__m128i A, int LA, __m128i B, int LB) {
return _mm_cmpestra(A, LA, B, LB, 7);
}
// CHECK: attributes [[TEST1_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}"
// CHECK: attributes [[GPR_ONLY_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
// CHECK: attributes [[TEST4_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}+sse4.2{{.*}}"

View File

@@ -52,12 +52,12 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
// CHECK: use_before_def{{.*}} #7
// CHECK: walrus{{.*}} #8
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
@@ -65,8 +65,8 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
// CHECK: "target-cpu"="x86-64-v2"
// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK-SAME: "target-features"="+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: "target-cpu"="x86-64-v3"
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: "target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"

View File

@@ -0,0 +1,41 @@
// Test interaction between -mcrc32 and other SIMD ISA options on x86
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-crc32 -msse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mcrc32 -msse4.2 -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mno-sse4.2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target i386-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mno-sse4.2 -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-CRC32 %s
unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
// CHECK-LABEL: test__crc32b
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
return __builtin_ia32_crc32qi(CRC, V);
}
// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32
// IR-CRC32: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}"

View File

@@ -0,0 +1,54 @@
// Test the -mgeneral-regs-only with -mcrc32 option on x86
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: not %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -mcrc32 -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
// RUN: not %clang -target i386-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// RUN: not %clang -target x86_64-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
// CMD-BEFORE: "-target-feature" "+crc32"
// CMD: "-target-feature" "-x87"
// CMD: "-target-feature" "-mmx"
// CMD: "-target-feature" "-sse"
// CMD-AFTER: "-target-feature" "+crc32"
unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
// CHECK-LABEL: test__crc32b
// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
return __builtin_ia32_crc32qi(CRC, V);
}
// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32
// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+crc32{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}"

View File

@@ -298,3 +298,8 @@
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512fp16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512FP16 %s
// AVX512FP16: "-target-feature" "+avx512fp16"
// NO-AVX512FP16: "-target-feature" "-avx512fp16"
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
// NO-CRC32: "-target-feature" "-crc32"

View File

@@ -580,3 +580,11 @@
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
// CRC32: #define __CRC32__ 1
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s
// NOCRC32-NOT: #define __CRC32__ 1

View File

@@ -158,6 +158,7 @@ X86_FEATURE (CLWB, "clwb")
X86_FEATURE (CLZERO, "clzero")
X86_FEATURE (CMPXCHG16B, "cx16")
X86_FEATURE (CMPXCHG8B, "cx8")
X86_FEATURE (CRC32, "crc32")
X86_FEATURE (ENQCMD, "enqcmd")
X86_FEATURE (F16C, "f16c")
X86_FEATURE (FSGSBASE, "fsgsbase")

View File

@@ -1071,8 +1071,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_FMA);
if ((ECX >> 19) & 1)
setFeature(X86::FEATURE_SSE4_1);
if ((ECX >> 20) & 1)
if ((ECX >> 20) & 1) {
setFeature(X86::FEATURE_SSE4_2);
setFeature(X86::FEATURE_CRC32);
}
if ((ECX >> 23) & 1)
setFeature(X86::FEATURE_POPCNT);
if ((ECX >> 25) & 1)
@@ -1518,6 +1520,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["cx16"] = (ECX >> 13) & 1;
Features["sse4.1"] = (ECX >> 19) & 1;
Features["sse4.2"] = (ECX >> 20) & 1;
Features["crc32"] = Features["sse4.2"];
Features["movbe"] = (ECX >> 22) & 1;
Features["popcnt"] = (ECX >> 23) & 1;
Features["aes"] = (ECX >> 25) & 1;

View File

@@ -139,8 +139,8 @@ constexpr FeatureBitset FeaturesNocona =
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
FeaturePOPCNT | FeatureSSE4_2 |
FeatureCMPXCHG16B;
FeaturePOPCNT | FeatureCRC32 |
FeatureSSE4_2 | FeatureCMPXCHG16B;
constexpr FeatureBitset FeaturesX86_64_V3 =
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
@@ -153,7 +153,7 @@ constexpr FeatureBitset FeaturesCore2 =
FeaturesNocona | FeatureSAHF | FeatureSSSE3;
constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
constexpr FeatureBitset FeaturesNehalem =
FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2;
FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2;
constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL;
constexpr FeatureBitset FeaturesSandyBridge =
FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT;
@@ -256,16 +256,17 @@ constexpr FeatureBitset FeaturesBTVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
FeatureSAHF;
constexpr FeatureBitset FeaturesBTVER2 =
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C |
FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 |
FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
// AMD Bulldozer architecture processors.
constexpr FeatureBitset FeaturesBDVER1 =
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP |
FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW |
FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 |
FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE;
FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 |
FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL |
FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 |
FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A |
FeatureXOP | FeatureXSAVE;
constexpr FeatureBitset FeaturesBDVER2 =
FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM;
constexpr FeatureBitset FeaturesBDVER3 =
@@ -278,9 +279,9 @@ constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 | FeatureAVX2 |
constexpr FeatureBitset FeaturesZNVER1 =
FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 |
FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO |
FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C |
FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureMMX |
FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT |
FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT |
FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
@@ -472,6 +473,7 @@ constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
constexpr FeatureBitset ImpliedFeaturesCMOV = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
constexpr FeatureBitset ImpliedFeaturesFXSR = {};

View File

@@ -42,6 +42,9 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
"Support CMPXCHG8B instructions">;
def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
"Enable SSE 4.2 CRC32 instruction">;
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
@@ -624,9 +627,10 @@ def ProcessorFeatures {
FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, Feature64Bit
];
list<SubtargetFeature> X86_64V2Features = !listconcat(
X86_64V1Features,
[FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
FeatureSSE42
]);
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
FeatureMOVBE, FeatureXSAVE
@@ -867,6 +871,7 @@ def ProcessorFeatures {
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeaturePRFCHW,
@@ -957,6 +962,7 @@ def ProcessorFeatures {
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeatureXSAVE,
@@ -1033,6 +1039,7 @@ def ProcessorFeatures {
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
FeatureAES,
FeatureCRC32,
FeaturePCLMUL,
FeatureBMI,
FeatureF16C,
@@ -1058,6 +1065,7 @@ def ProcessorFeatures {
Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeatureCRC32,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
@@ -1115,6 +1123,7 @@ def ProcessorFeatures {
FeatureCMOV,
Feature64Bit,
FeatureCMPXCHG16B,
FeatureCRC32,
FeatureF16C,
FeatureFMA,
FeatureFSGSBase,

View File

@@ -750,18 +750,19 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE42]>;
// CRC32I - SSE 4.2 CRC32 instructions.
// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
// controlled by the SSE42 flag.
class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//

View File

@@ -995,6 +995,7 @@ def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,

View File

@@ -6588,14 +6588,14 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
// of r and m.
class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
RegisterClass RCIn, SDPatternOperator Int> :
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
Sched<[WriteCRC32]>;
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
X86MemOperand x86memop, SDPatternOperator Int> :
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;

View File

@@ -428,6 +428,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor supports User Level Interrupt instructions
bool HasUINTR = false;
/// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
/// function is GPR only)
bool HasCRC32 = false;
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
@@ -767,6 +771,7 @@ public:
bool hasSERIALIZE() const { return HasSERIALIZE; }
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
bool hasUINTR() const { return HasUINTR; }
bool hasCRC32() const { return HasCRC32; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;

View File

@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c

View File

@@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X86-LABEL: crc32_32_8:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_8:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
; X86-LABEL: crc32_32_16:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_16:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
; X86-LABEL: crc32_32_32:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_32:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind

View File

@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
@@ -25,4 +23,3 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
%tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
ret i64 %tmp
}

View File

@@ -0,0 +1,29 @@
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
define i32 @test1(i32 %a, i8 %b) nounwind #0 {
; CHECK-LABEL: test1:
; CHECK: crc32b
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
define i32 @test2(i32 %a, i8 %b) nounwind #1 {
; CHECK-LABEL: test2:
; CHECK: crc32b
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
define i32 @test3(i32 %a, i8 %b) nounwind #2 {
; CHECK-LABEL: test3:
; CHECK: crc32b
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
attributes #0 = { "target-features"="+crc32" }
attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }

View File

@@ -78,4 +78,4 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8)
attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
attributes #1 = { "target-cpu"="x86-64" }
attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" }
attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes,+crc32" }

View File

@@ -1,10 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX1
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,AVX512
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX1
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,AVX512
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X86-SSE
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X86-SSE
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX1
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X86-AVX,AVX512
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X64-SSE
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,-crc32 | FileCheck %s --check-prefixes=SSE,X64-SSE
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX1
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,X64-AVX,AVX512
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
@@ -500,54 +502,3 @@ define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
ret i32 %res
}
declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
; X86-LABEL: test_mm_crc32_u8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32b %sil, %eax
; X64-NEXT: retq
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
; X86-LABEL: test_mm_crc32_u16:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32w %si, %eax
; X64-NEXT: retq
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) {
; X86-LABEL: test_mm_crc32_u32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32l %esi, %eax
; X64-NEXT: retq
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
ret i32 %res
}
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone

View File

@@ -1,10 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX1
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86,AVX,X86-AVX,X86-AVX512
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX1
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64,AVX,X64-AVX,X64-AVX512
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX1
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX512
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2,-crc32 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX1
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX512
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
; SSE-LABEL: test_x86_sse42_pcmpestri128:
@@ -616,54 +618,3 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X86-LABEL: crc32_32_8:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_8:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
; X86-LABEL: crc32_32_16:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_16:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
; X86-LABEL: crc32_32_32:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: crc32_32_32:
; X64: ## %bb.0:
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind

View File

@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+crc32,+pclmul < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"