mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 21:55:39 +08:00
[libclc] Implement erf/erfc vector function with loop since scalar function is large (#157055)
This PR reduces amdgcn--amdhsa.bc size by 1.8% and nvptx64--nvidiacl.bc size by 4%. Loop trip count is constant and backend can decide whether to unroll. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
28
libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
Normal file
28
libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
Normal file
@@ -0,0 +1,28 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/utils.h>
|
||||
|
||||
#if __CLC_VECSIZE_OR_1 >= 2
|
||||
|
||||
#ifndef __CLC_IMPL_FUNCTION
|
||||
#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) {
|
||||
union {
|
||||
__CLC_GENTYPE vec;
|
||||
__CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
|
||||
} u_x, u_result;
|
||||
u_x.vec = x;
|
||||
for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i)
|
||||
u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i]);
|
||||
return u_result.vec;
|
||||
}
|
||||
|
||||
#endif // __CLC_VECSIZE_OR_1 >= 2
|
||||
@@ -507,5 +507,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erf(half x) {
|
||||
#endif
|
||||
|
||||
#define __CLC_FUNCTION __clc_erf
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
@@ -518,5 +518,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erfc(half x) {
|
||||
#endif
|
||||
|
||||
#define __CLC_FUNCTION __clc_erfc
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
Reference in New Issue
Block a user