[libclc] Move copysign to CLC library; fix & optimize (#124598)

This commit moves the implementation of the copysign builtin to the CLC library. It simultaneously optimizes it for vector types by avoiding scalarization. It does so by using the __builtin_elementwise_copysign clang builtins, which can handle vector types. It also fixes a bug in the half/fp16 implementation of the builtin. This version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was thus preserving the original sign bit, rather than masking it out.
2026-02-02 02:00:03 +08:00 · 2025-01-28 09:18:34 +00:00
parent 4a00c84fbb
commit cfc8ef0ad8
8 changed files with 57 additions and 24 deletions
--- a/libclc/clc/include/clc/math/clc_copysign.h
+++ b/libclc/clc/include/clc/math/clc_copysign.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_MATH_CLC_COPYSIGN_H__
+#define __CLC_MATH_CLC_COPYSIGN_H__
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_copysign
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COPYSIGN_H__
--- a/libclc/clc/include/clc/shared/binary_def.inc
+++ b/libclc/clc/include/clc/shared/binary_def.inc
@@ -0,0 +1,10 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
+                                              __CLC_GENTYPE b) {
+  return __CLC_FUNCTION(FUNCTION)(a, b);
+}
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -1,4 +1,5 @@
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -5,6 +5,7 @@ geometric/clc_dot.cl
 integer/clc_abs.cl
 integer/clc_abs_diff.cl
 math/clc_ceil.cl
+math/clc_copysign.cl
 math/clc_fabs.cl
 math/clc_floor.cl
 math/clc_mad.cl
--- a/libclc/clc/lib/generic/math/clc_copysign.cl
+++ b/libclc/clc/lib/generic/math/clc_copysign.cl
@@ -0,0 +1,27 @@
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign,
+                                        __builtin_elementwise_copysign, float,
+                                        float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign,
+                                        __builtin_elementwise_copysign, double,
+                                        double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign,
+                                        __builtin_elementwise_copysign, half,
+                                        half)
+
+#endif
+
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -3,6 +3,7 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -3,6 +3,7 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/math/clc_ceil.cl
+../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
 ../generic/math/clc_floor.cl
 ../generic/math/clc_mad.cl
--- a/libclc/generic/lib/math/copysign.cl
+++ b/libclc/generic/lib/math/copysign.cl
@@ -1,27 +1,7 @@
 #include <clc/clc.h>
-#include <clc/clcmacro.h>
+#include <clc/math/clc_copysign.h>

-_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
+#define FUNCTION copysign
+#define __CLC_BODY <clc/shared/binary_def.inc>

-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
-{
-   ushort sign_x = as_ushort(x) & 0x8000u;
-   ushort unsigned_y = as_ushort(y) & 0x7ffffu;
-
-   return as_half((ushort)(sign_x | unsigned_y));
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
-
-#endif
+#include <clc/math/gentype.inc>