[OpenCL] Add cl_khr_extended_bit_ops

Add the builtins defined by Section 40 "Extended Bit Operations" in
the OpenCL Extension Specification.

Differential Revision: https://reviews.llvm.org/D106267
This commit is contained in:
Sven van Haastregt
2021-07-21 10:01:19 +01:00
parent e22a599672
commit 724f0e2abb
4 changed files with 218 additions and 0 deletions

View File

@@ -21,6 +21,7 @@
#define cl_khr_subgroup_shuffle 1
#define cl_khr_subgroup_shuffle_relative 1
#define cl_khr_subgroup_clustered_reduce 1
#define cl_khr_extended_bit_ops 1
#endif // defined(__SPIR__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)

View File

@@ -16051,6 +16051,206 @@ double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
#endif // cl_khr_subgroup_clustered_reduce
#if defined(cl_khr_extended_bit_ops)
char __ovld __cnfn bitfield_insert(char, char, uint, uint);
uchar __ovld __cnfn bitfield_insert(uchar, uchar, uint, uint);
short __ovld __cnfn bitfield_insert(short, short, uint, uint);
ushort __ovld __cnfn bitfield_insert(ushort, ushort, uint, uint);
int __ovld __cnfn bitfield_insert(int, int, uint, uint);
uint __ovld __cnfn bitfield_insert(uint, uint, uint, uint);
long __ovld __cnfn bitfield_insert(long, long, uint, uint);
ulong __ovld __cnfn bitfield_insert(ulong, ulong, uint, uint);
char2 __ovld __cnfn bitfield_insert(char2, char2, uint, uint);
uchar2 __ovld __cnfn bitfield_insert(uchar2, uchar2, uint, uint);
short2 __ovld __cnfn bitfield_insert(short2, short2, uint, uint);
ushort2 __ovld __cnfn bitfield_insert(ushort2, ushort2, uint, uint);
int2 __ovld __cnfn bitfield_insert(int2, int2, uint, uint);
uint2 __ovld __cnfn bitfield_insert(uint2, uint2, uint, uint);
long2 __ovld __cnfn bitfield_insert(long2, long2, uint, uint);
ulong2 __ovld __cnfn bitfield_insert(ulong2, ulong2, uint, uint);
char3 __ovld __cnfn bitfield_insert(char3, char3, uint, uint);
uchar3 __ovld __cnfn bitfield_insert(uchar3, uchar3, uint, uint);
short3 __ovld __cnfn bitfield_insert(short3, short3, uint, uint);
ushort3 __ovld __cnfn bitfield_insert(ushort3, ushort3, uint, uint);
int3 __ovld __cnfn bitfield_insert(int3, int3, uint, uint);
uint3 __ovld __cnfn bitfield_insert(uint3, uint3, uint, uint);
long3 __ovld __cnfn bitfield_insert(long3, long3, uint, uint);
ulong3 __ovld __cnfn bitfield_insert(ulong3, ulong3, uint, uint);
char4 __ovld __cnfn bitfield_insert(char4, char4, uint, uint);
uchar4 __ovld __cnfn bitfield_insert(uchar4, uchar4, uint, uint);
short4 __ovld __cnfn bitfield_insert(short4, short4, uint, uint);
ushort4 __ovld __cnfn bitfield_insert(ushort4, ushort4, uint, uint);
int4 __ovld __cnfn bitfield_insert(int4, int4, uint, uint);
uint4 __ovld __cnfn bitfield_insert(uint4, uint4, uint, uint);
long4 __ovld __cnfn bitfield_insert(long4, long4, uint, uint);
ulong4 __ovld __cnfn bitfield_insert(ulong4, ulong4, uint, uint);
char8 __ovld __cnfn bitfield_insert(char8, char8, uint, uint);
uchar8 __ovld __cnfn bitfield_insert(uchar8, uchar8, uint, uint);
short8 __ovld __cnfn bitfield_insert(short8, short8, uint, uint);
ushort8 __ovld __cnfn bitfield_insert(ushort8, ushort8, uint, uint);
int8 __ovld __cnfn bitfield_insert(int8, int8, uint, uint);
uint8 __ovld __cnfn bitfield_insert(uint8, uint8, uint, uint);
long8 __ovld __cnfn bitfield_insert(long8, long8, uint, uint);
ulong8 __ovld __cnfn bitfield_insert(ulong8, ulong8, uint, uint);
char16 __ovld __cnfn bitfield_insert(char16, char16, uint, uint);
uchar16 __ovld __cnfn bitfield_insert(uchar16, uchar16, uint, uint);
short16 __ovld __cnfn bitfield_insert(short16, short16, uint, uint);
ushort16 __ovld __cnfn bitfield_insert(ushort16, ushort16, uint, uint);
int16 __ovld __cnfn bitfield_insert(int16, int16, uint, uint);
uint16 __ovld __cnfn bitfield_insert(uint16, uint16, uint, uint);
long16 __ovld __cnfn bitfield_insert(long16, long16, uint, uint);
ulong16 __ovld __cnfn bitfield_insert(ulong16, ulong16, uint, uint);
char __ovld __cnfn bitfield_extract_signed(char, uint, uint);
short __ovld __cnfn bitfield_extract_signed(short, uint, uint);
int __ovld __cnfn bitfield_extract_signed(int, uint, uint);
long __ovld __cnfn bitfield_extract_signed(long, uint, uint);
char2 __ovld __cnfn bitfield_extract_signed(char2, uint, uint);
short2 __ovld __cnfn bitfield_extract_signed(short2, uint, uint);
int2 __ovld __cnfn bitfield_extract_signed(int2, uint, uint);
long2 __ovld __cnfn bitfield_extract_signed(long2, uint, uint);
char3 __ovld __cnfn bitfield_extract_signed(char3, uint, uint);
short3 __ovld __cnfn bitfield_extract_signed(short3, uint, uint);
int3 __ovld __cnfn bitfield_extract_signed(int3, uint, uint);
long3 __ovld __cnfn bitfield_extract_signed(long3, uint, uint);
char4 __ovld __cnfn bitfield_extract_signed(char4, uint, uint);
short4 __ovld __cnfn bitfield_extract_signed(short4, uint, uint);
int4 __ovld __cnfn bitfield_extract_signed(int4, uint, uint);
long4 __ovld __cnfn bitfield_extract_signed(long4, uint, uint);
char8 __ovld __cnfn bitfield_extract_signed(char8, uint, uint);
short8 __ovld __cnfn bitfield_extract_signed(short8, uint, uint);
int8 __ovld __cnfn bitfield_extract_signed(int8, uint, uint);
long8 __ovld __cnfn bitfield_extract_signed(long8, uint, uint);
char16 __ovld __cnfn bitfield_extract_signed(char16, uint, uint);
short16 __ovld __cnfn bitfield_extract_signed(short16, uint, uint);
int16 __ovld __cnfn bitfield_extract_signed(int16, uint, uint);
long16 __ovld __cnfn bitfield_extract_signed(long16, uint, uint);
char __ovld __cnfn bitfield_extract_signed(uchar, uint, uint);
short __ovld __cnfn bitfield_extract_signed(ushort, uint, uint);
int __ovld __cnfn bitfield_extract_signed(uint, uint, uint);
long __ovld __cnfn bitfield_extract_signed(ulong, uint, uint);
char2 __ovld __cnfn bitfield_extract_signed(uchar2, uint, uint);
short2 __ovld __cnfn bitfield_extract_signed(ushort2, uint, uint);
int2 __ovld __cnfn bitfield_extract_signed(uint2, uint, uint);
long2 __ovld __cnfn bitfield_extract_signed(ulong2, uint, uint);
char3 __ovld __cnfn bitfield_extract_signed(uchar3, uint, uint);
short3 __ovld __cnfn bitfield_extract_signed(ushort3, uint, uint);
int3 __ovld __cnfn bitfield_extract_signed(uint3, uint, uint);
long3 __ovld __cnfn bitfield_extract_signed(ulong3, uint, uint);
char4 __ovld __cnfn bitfield_extract_signed(uchar4, uint, uint);
short4 __ovld __cnfn bitfield_extract_signed(ushort4, uint, uint);
int4 __ovld __cnfn bitfield_extract_signed(uint4, uint, uint);
long4 __ovld __cnfn bitfield_extract_signed(ulong4, uint, uint);
char8 __ovld __cnfn bitfield_extract_signed(uchar8, uint, uint);
short8 __ovld __cnfn bitfield_extract_signed(ushort8, uint, uint);
int8 __ovld __cnfn bitfield_extract_signed(uint8, uint, uint);
long8 __ovld __cnfn bitfield_extract_signed(ulong8, uint, uint);
char16 __ovld __cnfn bitfield_extract_signed(uchar16, uint, uint);
short16 __ovld __cnfn bitfield_extract_signed(ushort16, uint, uint);
int16 __ovld __cnfn bitfield_extract_signed(uint16, uint, uint);
long16 __ovld __cnfn bitfield_extract_signed(ulong16, uint, uint);
uchar __ovld __cnfn bitfield_extract_unsigned(char, uint, uint);
ushort __ovld __cnfn bitfield_extract_unsigned(short, uint, uint);
uint __ovld __cnfn bitfield_extract_unsigned(int, uint, uint);
ulong __ovld __cnfn bitfield_extract_unsigned(long, uint, uint);
uchar2 __ovld __cnfn bitfield_extract_unsigned(char2, uint, uint);
ushort2 __ovld __cnfn bitfield_extract_unsigned(short2, uint, uint);
uint2 __ovld __cnfn bitfield_extract_unsigned(int2, uint, uint);
ulong2 __ovld __cnfn bitfield_extract_unsigned(long2, uint, uint);
uchar3 __ovld __cnfn bitfield_extract_unsigned(char3, uint, uint);
ushort3 __ovld __cnfn bitfield_extract_unsigned(short3, uint, uint);
uint3 __ovld __cnfn bitfield_extract_unsigned(int3, uint, uint);
ulong3 __ovld __cnfn bitfield_extract_unsigned(long3, uint, uint);
uchar4 __ovld __cnfn bitfield_extract_unsigned(char4, uint, uint);
ushort4 __ovld __cnfn bitfield_extract_unsigned(short4, uint, uint);
uint4 __ovld __cnfn bitfield_extract_unsigned(int4, uint, uint);
ulong4 __ovld __cnfn bitfield_extract_unsigned(long4, uint, uint);
uchar8 __ovld __cnfn bitfield_extract_unsigned(char8, uint, uint);
ushort8 __ovld __cnfn bitfield_extract_unsigned(short8, uint, uint);
uint8 __ovld __cnfn bitfield_extract_unsigned(int8, uint, uint);
ulong8 __ovld __cnfn bitfield_extract_unsigned(long8, uint, uint);
uchar16 __ovld __cnfn bitfield_extract_unsigned(char16, uint, uint);
ushort16 __ovld __cnfn bitfield_extract_unsigned(short16, uint, uint);
uint16 __ovld __cnfn bitfield_extract_unsigned(int16, uint, uint);
ulong16 __ovld __cnfn bitfield_extract_unsigned(long16, uint, uint);
uchar __ovld __cnfn bitfield_extract_unsigned(uchar, uint, uint);
ushort __ovld __cnfn bitfield_extract_unsigned(ushort, uint, uint);
uint __ovld __cnfn bitfield_extract_unsigned(uint, uint, uint);
ulong __ovld __cnfn bitfield_extract_unsigned(ulong, uint, uint);
uchar2 __ovld __cnfn bitfield_extract_unsigned(uchar2, uint, uint);
ushort2 __ovld __cnfn bitfield_extract_unsigned(ushort2, uint, uint);
uint2 __ovld __cnfn bitfield_extract_unsigned(uint2, uint, uint);
ulong2 __ovld __cnfn bitfield_extract_unsigned(ulong2, uint, uint);
uchar3 __ovld __cnfn bitfield_extract_unsigned(uchar3, uint, uint);
ushort3 __ovld __cnfn bitfield_extract_unsigned(ushort3, uint, uint);
uint3 __ovld __cnfn bitfield_extract_unsigned(uint3, uint, uint);
ulong3 __ovld __cnfn bitfield_extract_unsigned(ulong3, uint, uint);
uchar4 __ovld __cnfn bitfield_extract_unsigned(uchar4, uint, uint);
ushort4 __ovld __cnfn bitfield_extract_unsigned(ushort4, uint, uint);
uint4 __ovld __cnfn bitfield_extract_unsigned(uint4, uint, uint);
ulong4 __ovld __cnfn bitfield_extract_unsigned(ulong4, uint, uint);
uchar8 __ovld __cnfn bitfield_extract_unsigned(uchar8, uint, uint);
ushort8 __ovld __cnfn bitfield_extract_unsigned(ushort8, uint, uint);
uint8 __ovld __cnfn bitfield_extract_unsigned(uint8, uint, uint);
ulong8 __ovld __cnfn bitfield_extract_unsigned(ulong8, uint, uint);
uchar16 __ovld __cnfn bitfield_extract_unsigned(uchar16, uint, uint);
ushort16 __ovld __cnfn bitfield_extract_unsigned(ushort16, uint, uint);
uint16 __ovld __cnfn bitfield_extract_unsigned(uint16, uint, uint);
ulong16 __ovld __cnfn bitfield_extract_unsigned(ulong16, uint, uint);
char __ovld __cnfn bit_reverse(char);
uchar __ovld __cnfn bit_reverse(uchar);
short __ovld __cnfn bit_reverse(short);
ushort __ovld __cnfn bit_reverse(ushort);
int __ovld __cnfn bit_reverse(int);
uint __ovld __cnfn bit_reverse(uint);
long __ovld __cnfn bit_reverse(long);
ulong __ovld __cnfn bit_reverse(ulong);
char2 __ovld __cnfn bit_reverse(char2);
uchar2 __ovld __cnfn bit_reverse(uchar2);
short2 __ovld __cnfn bit_reverse(short2);
ushort2 __ovld __cnfn bit_reverse(ushort2);
int2 __ovld __cnfn bit_reverse(int2);
uint2 __ovld __cnfn bit_reverse(uint2);
long2 __ovld __cnfn bit_reverse(long2);
ulong2 __ovld __cnfn bit_reverse(ulong2);
char3 __ovld __cnfn bit_reverse(char3);
uchar3 __ovld __cnfn bit_reverse(uchar3);
short3 __ovld __cnfn bit_reverse(short3);
ushort3 __ovld __cnfn bit_reverse(ushort3);
int3 __ovld __cnfn bit_reverse(int3);
uint3 __ovld __cnfn bit_reverse(uint3);
long3 __ovld __cnfn bit_reverse(long3);
ulong3 __ovld __cnfn bit_reverse(ulong3);
char4 __ovld __cnfn bit_reverse(char4);
uchar4 __ovld __cnfn bit_reverse(uchar4);
short4 __ovld __cnfn bit_reverse(short4);
ushort4 __ovld __cnfn bit_reverse(ushort4);
int4 __ovld __cnfn bit_reverse(int4);
uint4 __ovld __cnfn bit_reverse(uint4);
long4 __ovld __cnfn bit_reverse(long4);
ulong4 __ovld __cnfn bit_reverse(ulong4);
char8 __ovld __cnfn bit_reverse(char8);
uchar8 __ovld __cnfn bit_reverse(uchar8);
short8 __ovld __cnfn bit_reverse(short8);
ushort8 __ovld __cnfn bit_reverse(ushort8);
int8 __ovld __cnfn bit_reverse(int8);
uint8 __ovld __cnfn bit_reverse(uint8);
long8 __ovld __cnfn bit_reverse(long8);
ulong8 __ovld __cnfn bit_reverse(ulong8);
char16 __ovld __cnfn bit_reverse(char16);
uchar16 __ovld __cnfn bit_reverse(uchar16);
short16 __ovld __cnfn bit_reverse(short16);
ushort16 __ovld __cnfn bit_reverse(ushort16);
int16 __ovld __cnfn bit_reverse(int16);
uint16 __ovld __cnfn bit_reverse(uint16);
long16 __ovld __cnfn bit_reverse(long16);
ulong16 __ovld __cnfn bit_reverse(ulong16);
#endif // cl_khr_extended_bit_ops
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );

View File

@@ -72,6 +72,7 @@ def FuncExtKhrSubgroupNonUniformArithmetic: FunctionExtension<"cl_khr_subgroup_n
def FuncExtKhrSubgroupShuffle : FunctionExtension<"cl_khr_subgroup_shuffle">;
def FuncExtKhrSubgroupShuffleRelative : FunctionExtension<"cl_khr_subgroup_shuffle_relative">;
def FuncExtKhrSubgroupClusteredReduce : FunctionExtension<"cl_khr_subgroup_clustered_reduce">;
def FuncExtKhrExtendedBitOps : FunctionExtension<"cl_khr_extended_bit_ops">;
def FuncExtKhrGlobalInt32BaseAtomics : FunctionExtension<"cl_khr_global_int32_base_atomics">;
def FuncExtKhrGlobalInt32ExtendedAtomics : FunctionExtension<"cl_khr_global_int32_extended_atomics">;
def FuncExtKhrLocalInt32BaseAtomics : FunctionExtension<"cl_khr_local_int32_base_atomics">;
@@ -1738,6 +1739,16 @@ let Extension = FuncExtKhrSubgroupClusteredReduce in {
}
}
// Section 40.3.1 - cl_khr_extended_bit_ops
let Extension = FuncExtKhrExtendedBitOps in {
def : Builtin<"bitfield_insert", [AIGenTypeN, AIGenTypeN, AIGenTypeN, UInt, UInt], Attr.Const>;
def : Builtin<"bitfield_extract_signed", [SGenTypeN, SGenTypeN, UInt, UInt], Attr.Const>;
def : Builtin<"bitfield_extract_signed", [SGenTypeN, UGenTypeN, UInt, UInt], Attr.Const>;
def : Builtin<"bitfield_extract_unsigned", [UGenTypeN, SGenTypeN, UInt, UInt], Attr.Const>;
def : Builtin<"bitfield_extract_unsigned", [UGenTypeN, UGenTypeN, UInt, UInt], Attr.Const>;
def : Builtin<"bit_reverse", [AIGenTypeN, AIGenTypeN], Attr.Const>;
}
//--------------------------------------------------------------------
// Arm extensions.
let Extension = ArmIntegerDotProductInt8 in {

View File

@@ -123,6 +123,9 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#if cl_khr_subgroup_clustered_reduce != 1
#error "Incorrectly defined cl_khr_subgroup_clustered_reduce"
#endif
#if cl_khr_extended_bit_ops != 1
#error "Incorrectly defined cl_khr_extended_bit_ops"
#endif
#else
@@ -147,6 +150,9 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#ifdef cl_khr_subgroup_clustered_reduce
#error "Incorrect cl_khr_subgroup_clustered_reduce define"
#endif
#ifdef cl_khr_extended_bit_ops
#error "Incorrect cl_khr_extended_bit_ops define"
#endif
#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)