mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 02:38:07 +08:00
[libclc] Remove __attribute__((always_inline)) (#158791)
always_inline doesn't guarantee performance improvement. Target-specific optimizations decide whether inlining is profitable. Changes to amdgcn--amdhsa.bc: * _Z9__clc_logDv16_f and _Z15__clc_remainderDv16_fS_ are not inlined. * sincos vector function code size has doubled due to apparent duplication. Also replace typo _CLC_DECL with _CLC_DEF for function definition.
This commit is contained in:
@@ -11,17 +11,13 @@
|
||||
|
||||
#define _CLC_OVERLOAD __attribute__((overloadable))
|
||||
#define _CLC_DECL
|
||||
#define _CLC_INLINE __attribute__((always_inline)) inline
|
||||
#define _CLC_INLINE inline
|
||||
#define _CLC_CONST __attribute__((const))
|
||||
|
||||
// avoid inlines for SPIR-V related targets since we'll optimise later in the
|
||||
// chain
|
||||
#if defined(CLC_SPIRV)
|
||||
#define _CLC_DEF
|
||||
#elif defined(CLC_CLSPV)
|
||||
#if defined(CLC_CLSPV)
|
||||
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
|
||||
#else
|
||||
#define _CLC_DEF __attribute__((always_inline))
|
||||
#define _CLC_DEF
|
||||
#endif
|
||||
|
||||
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
|
||||
|
||||
@@ -18,22 +18,22 @@
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
|
||||
|
||||
@@ -18,19 +18,19 @@
|
||||
// The return type is same base type as the input type, with the same vector
|
||||
// size as the mask. Elements in the mask must be the same size (number of bits)
|
||||
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
|
||||
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
|
||||
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) {
|
||||
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#ifdef __CLC_FPSIZE
|
||||
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange( \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator, \
|
||||
__CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal, \
|
||||
int MemoryScope) { \
|
||||
@@ -38,7 +38,7 @@
|
||||
#else
|
||||
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange( \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator, \
|
||||
__CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal, \
|
||||
int MemoryScope) { \
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
|
||||
#ifdef __CLC_NO_VALUE_ARG
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \
|
||||
int MemoryScope) { \
|
||||
return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION( \
|
||||
@@ -39,7 +39,7 @@
|
||||
}
|
||||
#elif defined(__CLC_INC_DEC)
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \
|
||||
int MemoryScope) { \
|
||||
return __CLC_AS_RETTYPE( \
|
||||
@@ -48,7 +48,7 @@
|
||||
}
|
||||
#elif defined(__CLC_RETURN_VOID)
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION( \
|
||||
_CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \
|
||||
int MemoryOrder, int MemoryScope) { \
|
||||
__CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, Value, \
|
||||
@@ -56,7 +56,7 @@
|
||||
}
|
||||
#else
|
||||
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
|
||||
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \
|
||||
int MemoryOrder, int MemoryScope) { \
|
||||
return __CLC_AS_RETTYPE( \
|
||||
|
||||
@@ -74,8 +74,8 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
|
||||
return ret;
|
||||
}
|
||||
|
||||
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
|
||||
__CLC_INTN regn) {
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
|
||||
__CLC_INTN regn) {
|
||||
// Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
|
||||
__CLC_FLOATN r = x * x;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user