[libclc] Move copysign to CLC library; fix & optimize (#124598)

This commit moves the implementation of the copysign builtin to the CLC
library.

It simultaneously optimizes it for vector types by avoiding
scalarization. It does so by using the __builtin_elementwise_copysign
clang builtins, which can handle vector types.

It also fixes a bug in the half/fp16 implementation of the builtin. This
version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was
thus preserving the original sign bit, rather than masking it out.
This commit is contained in:
Fraser Cormack
2025-01-28 09:18:34 +00:00
committed by GitHub
parent 4a00c84fbb
commit cfc8ef0ad8
8 changed files with 57 additions and 24 deletions

View File

@@ -0,0 +1,12 @@
#ifndef __CLC_MATH_CLC_COPYSIGN_H__
#define __CLC_MATH_CLC_COPYSIGN_H__
#define __CLC_BODY <clc/shared/binary_decl.inc>
#define __CLC_FUNCTION __clc_copysign
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_FUNCTION
#endif // __CLC_MATH_CLC_COPYSIGN_H__

View File

@@ -0,0 +1,10 @@
#include <clc/utils.h>
#ifndef __CLC_FUNCTION
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
__CLC_GENTYPE b) {
return __CLC_FUNCTION(FUNCTION)(a, b);
}

View File

@@ -1,4 +1,5 @@
../generic/math/clc_ceil.cl
../generic/math/clc_copysign.cl
../generic/math/clc_fabs.cl
../generic/math/clc_floor.cl
../generic/math/clc_mad.cl

View File

@@ -5,6 +5,7 @@ geometric/clc_dot.cl
integer/clc_abs.cl
integer/clc_abs_diff.cl
math/clc_ceil.cl
math/clc_copysign.cl
math/clc_fabs.cl
math/clc_floor.cl
math/clc_mad.cl

View File

@@ -0,0 +1,27 @@
#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign,
__builtin_elementwise_copysign, float,
float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign,
__builtin_elementwise_copysign, double,
double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign,
__builtin_elementwise_copysign, half,
half)
#endif

View File

@@ -3,6 +3,7 @@
../generic/common/clc_smoothstep.cl
../generic/geometric/clc_dot.cl
../generic/math/clc_ceil.cl
../generic/math/clc_copysign.cl
../generic/math/clc_fabs.cl
../generic/math/clc_floor.cl
../generic/math/clc_mad.cl

View File

@@ -3,6 +3,7 @@
../generic/common/clc_smoothstep.cl
../generic/geometric/clc_dot.cl
../generic/math/clc_ceil.cl
../generic/math/clc_copysign.cl
../generic/math/clc_fabs.cl
../generic/math/clc_floor.cl
../generic/math/clc_mad.cl

View File

@@ -1,27 +1,7 @@
#include <clc/clc.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_copysign.h>
_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
#define FUNCTION copysign
#define __CLC_BODY <clc/shared/binary_def.inc>
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
{
ushort sign_x = as_ushort(x) & 0x8000u;
ushort unsigned_y = as_ushort(y) & 0x7ffffu;
return as_half((ushort)(sign_x | unsigned_y));
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
#endif
#include <clc/math/gentype.inc>