[Clang][SVE2p1] Add svpsel builtins

As described in: https://github.com/ARM-software/acle/pull/257

Patch by : Sander de Smalen<sander.desmalen@arm.com>

Reviewed By: kmclaughlin

Differential Revision: https://reviews.llvm.org/D151197
This commit is contained in:
Caroline Concatto
2023-10-18 13:00:12 +00:00
parent 5a600c23f9
commit 1b93e15bcd
3 changed files with 202 additions and 0 deletions

View File

@@ -1865,10 +1865,21 @@ def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_
def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [], []>;
def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [], []>;
def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [], []>;
def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [], []>;
}
let TargetGuard = "sve2p1" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [], []>;
def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
}

View File

@@ -10007,7 +10007,33 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
default:
return nullptr;
case SVE::BI__builtin_sve_svpsel_lane_b8:
case SVE::BI__builtin_sve_svpsel_lane_b16:
case SVE::BI__builtin_sve_svpsel_lane_b32:
case SVE::BI__builtin_sve_svpsel_lane_b64:
case SVE::BI__builtin_sve_svpsel_lane_c8:
case SVE::BI__builtin_sve_svpsel_lane_c16:
case SVE::BI__builtin_sve_svpsel_lane_c32:
case SVE::BI__builtin_sve_svpsel_lane_c64: {
bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
"aarch64.svcount")) &&
"Unexpected TargetExtType");
auto SVCountTy =
llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
Function *CastFromSVCountF =
CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
Function *CastToSVCountF =
CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
llvm::Value *Ops0 =
IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
}
case SVE::BI__builtin_sve_svmov_b_z: {
// svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
SVETypeFlags TypeFlags(Builtin->TypeModifier);

View File

@@ -0,0 +1,165 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
// CHECK-LABEL: @test_svpsel_lane_b8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_b8u10__SVBool_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_b8(p1, p2, idx + 15);
}
// CHECK-LABEL: @test_svpsel_lane_b16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 8 x i1> [[TMP0]], i32 [[ADD]])
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b16u10__SVBool_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 8 x i1> [[TMP0]], i32 [[ADD]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_b16(p1, p2, idx + 7);
}
// CHECK-LABEL: @test_svpsel_lane_b32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[ADD]])
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b32u10__SVBool_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[ADD]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_b32(p1, p2, idx + 3);
}
// CHECK-LABEL: @test_svpsel_lane_b64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 2 x i1> [[TMP0]], i32 [[ADD]])
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b64u10__SVBool_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 2 x i1> [[TMP0]], i32 [[ADD]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_b64(p1, p2, idx + 1);
}
// CHECK-LABEL: @test_svpsel_lane_c8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
// CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP1]])
// CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_c8u11__SVCount_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP1]])
// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]]
//
svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_c8(p1, p2, idx + 15);
}
// CHECK-LABEL: @test_svpsel_lane_c16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], i32 [[ADD]])
// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c16u11__SVCount_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], i32 [[ADD]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_c16(p1, p2, idx + 7);
}
// CHECK-LABEL: @test_svpsel_lane_c32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i32 [[ADD]])
// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c32u11__SVCount_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i32 [[ADD]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_c32(p1, p2, idx + 3);
}
// CHECK-LABEL: @test_svpsel_lane_c64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32 [[ADD]])
// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c64u11__SVCount_tu10__SVBool_tj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P2:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32 [[ADD]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]]
//
svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) {
return svpsel_lane_c64(p1, p2, idx + 1);
}