mirror of
https://github.com/intel/llvm.git
synced 2026-02-02 02:00:03 +08:00
[libclc] Move copysign to CLC library; fix & optimize (#124598)
This commit moves the implementation of the copysign builtin to the CLC library. It simultaneously optimizes it for vector types by avoiding scalarization. It does so by using the __builtin_elementwise_copysign clang builtins, which can handle vector types. It also fixes a bug in the half/fp16 implementation of the builtin. This version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was thus preserving the original sign bit, rather than masking it out.
This commit is contained in:
12
libclc/clc/include/clc/math/clc_copysign.h
Normal file
12
libclc/clc/include/clc/math/clc_copysign.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef __CLC_MATH_CLC_COPYSIGN_H__
|
||||
#define __CLC_MATH_CLC_COPYSIGN_H__
|
||||
|
||||
#define __CLC_BODY <clc/shared/binary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_copysign
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_COPYSIGN_H__
|
||||
10
libclc/clc/include/clc/shared/binary_def.inc
Normal file
10
libclc/clc/include/clc/shared/binary_def.inc
Normal file
@@ -0,0 +1,10 @@
|
||||
#include <clc/utils.h>
|
||||
|
||||
#ifndef __CLC_FUNCTION
|
||||
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
|
||||
__CLC_GENTYPE b) {
|
||||
return __CLC_FUNCTION(FUNCTION)(a, b);
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
../generic/math/clc_ceil.cl
|
||||
../generic/math/clc_copysign.cl
|
||||
../generic/math/clc_fabs.cl
|
||||
../generic/math/clc_floor.cl
|
||||
../generic/math/clc_mad.cl
|
||||
|
||||
@@ -5,6 +5,7 @@ geometric/clc_dot.cl
|
||||
integer/clc_abs.cl
|
||||
integer/clc_abs_diff.cl
|
||||
math/clc_ceil.cl
|
||||
math/clc_copysign.cl
|
||||
math/clc_fabs.cl
|
||||
math/clc_floor.cl
|
||||
math/clc_mad.cl
|
||||
|
||||
27
libclc/clc/lib/generic/math/clc_copysign.cl
Normal file
27
libclc/clc/lib/generic/math/clc_copysign.cl
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign,
|
||||
__builtin_elementwise_copysign, float,
|
||||
float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign,
|
||||
__builtin_elementwise_copysign, double,
|
||||
double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign,
|
||||
__builtin_elementwise_copysign, half,
|
||||
half)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
../generic/common/clc_smoothstep.cl
|
||||
../generic/geometric/clc_dot.cl
|
||||
../generic/math/clc_ceil.cl
|
||||
../generic/math/clc_copysign.cl
|
||||
../generic/math/clc_fabs.cl
|
||||
../generic/math/clc_floor.cl
|
||||
../generic/math/clc_mad.cl
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
../generic/common/clc_smoothstep.cl
|
||||
../generic/geometric/clc_dot.cl
|
||||
../generic/math/clc_ceil.cl
|
||||
../generic/math/clc_copysign.cl
|
||||
../generic/math/clc_fabs.cl
|
||||
../generic/math/clc_floor.cl
|
||||
../generic/math/clc_mad.cl
|
||||
|
||||
@@ -1,27 +1,7 @@
|
||||
#include <clc/clc.h>
|
||||
#include <clc/clcmacro.h>
|
||||
#include <clc/math/clc_copysign.h>
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
|
||||
#define FUNCTION copysign
|
||||
#define __CLC_BODY <clc/shared/binary_def.inc>
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
|
||||
{
|
||||
ushort sign_x = as_ushort(x) & 0x8000u;
|
||||
ushort unsigned_y = as_ushort(y) & 0x7ffffu;
|
||||
|
||||
return as_half((ushort)(sign_x | unsigned_y));
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
|
||||
|
||||
#endif
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
Reference in New Issue
Block a user