[libclc] Optimize generic CLC fmin/fmax (#128506)

With this commit, the CLC fmin/fmax builtins use clang's __builtin_elementwise_(min|max)imumnum which helps us generate LLVM minimumnum/maximumnum intrinsics directly. These intrinsics uniformly select the non-NaN input over the (quiet or signalling) NaN input, which corresponds to what the OpenCL CTS tests. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets. Note that there is ongoing discussion regarding how these builtins should handle signalling NaNs in the OpenCL specification and whether they should be able to return a quiet NaN as per the IEEE behaviour. If the specification and/or CTS is ever updated to allow or mandate returning a qNAN, these builtins could/should be updated to use __builtin_elementwise_(min|max)num instead which would lower to LLVM minnum/maxnum intrinsics. The SPIR-V targets maintain the old implementations, as the LLVM -> SPIR-V translator can't currently handle the LLVM intrinsics. The implementation has been simplifies to consistently use clang builtins, as opposed to before where the half version was explicitly defined. [1] https://github.com/KhronosGroup/OpenCL-CTS/pull/2285
2026-01-14 03:50:17 +08:00 · 2025-07-29 13:21:42 +01:00
parent 315e2e28b1
commit 586cacdbdd
9 changed files with 11 additions and 216 deletions
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,5 +1,3 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_ldexp_override.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl
--- a/libclc/clc/lib/generic/math/clc_fmax.cl
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -6,53 +6,10 @@
 //
 //===----------------------------------------------------------------------===//

-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>

-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmaxf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
+
 #include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (x < y) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
--- a/libclc/clc/lib/generic/math/clc_fmin.cl
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -6,52 +6,10 @@
 //
 //===----------------------------------------------------------------------===//

-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>

-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fminf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
+
 #include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-
-#endif
--- a/libclc/clc/lib/r600/SOURCES
+++ b/libclc/clc/lib/r600/SOURCES
@@ -1,4 +1,2 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_native_rsqrt.cl
 math/clc_rsqrt_override.cl
--- a/libclc/clc/lib/r600/math/clc_fmax.cl
+++ b/libclc/clc/lib/r600/math/clc_fmax.cl
@@ -1,41 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // Flush denormals if not enabled. Otherwise fmax instruction flushes the
-  // values for comparison, but outputs original denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fmaxf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  return __builtin_fmax(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
--- a/libclc/clc/lib/r600/math/clc_fmin.cl
+++ b/libclc/clc/lib/r600/math/clc_fmin.cl
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fminf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  return __builtin_fmin(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1 +1,3 @@
+math/clc_fmax.cl
+math/clc_fmin.cl
 math/clc_runtime_has_hw_fma32.cl
--- a/libclc/clc/lib/amdgcn/math/clc_fmax.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_fmax.cl
@@ -8,40 +8,23 @@

 #include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>

 _CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmax instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
  return __builtin_fmaxf(x, y);
 }

 #ifdef cl_khr_fp64
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
 _CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
  return __builtin_fmax(x, y);
 }
-
 #endif
+
 #ifdef cl_khr_fp16
-
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
 _CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? x : y;
+  return __builtin_fmaxf16(x, y);
 }
-
 #endif

 #define FUNCTION __clc_fmax
--- a/libclc/clc/lib/amdgcn/math/clc_fmin.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_fmin.cl
@@ -8,41 +8,23 @@

 #include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>

 _CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
  return __builtin_fminf(x, y);
 }

 #ifdef cl_khr_fp64
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
 _CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
  return __builtin_fmin(x, y);
 }
-
 #endif

 #ifdef cl_khr_fp16
-
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
 _CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
+  return __builtin_fminf16(x, y);
 }
-
 #endif

 #define FUNCTION __clc_fmin