mirror of
https://github.com/intel/llvm.git
synced 2026-02-06 06:31:50 +08:00
[RISCV] Scalarize constant stores of fixed vectors if small enough
For stores of small fixed-length vector constants, we can store them with a sequence of lui/addi/sh/sw to avoid the cost of building the vector and the vsetivli toggle, provided the constant materialization cost isn't too high. This subsumes the optimisation for stores of zeroes in4dc9a2c5b9(This is a reapply of0ca13f9d27) Reviewed By: reames Differential Revision: https://reviews.llvm.org/D151221
This commit is contained in:
@@ -12193,18 +12193,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
isPowerOf2_64(MemVT.getSizeInBits()) &&
|
||||
MemVT.getSizeInBits() <= Subtarget.getXLen();
|
||||
|
||||
// Using vector to store zeros requires e.g.:
|
||||
// vsetivli zero, 2, e64, m1, ta, ma
|
||||
// vmv.v.i v8, 0
|
||||
// If sufficiently aligned we can scalarize stores of constant vectors of
|
||||
// any power-of-two size up to XLen bits, provided that they aren't too
|
||||
// expensive to materialize.
|
||||
// vsetivli zero, 2, e8, m1, ta, ma
|
||||
// vmv.v.i v8, 4
|
||||
// vse64.v v8, (a0)
|
||||
// If sufficiently aligned, we can use at most one scalar store to zero
|
||||
// initialize any power-of-two size up to XLen bits.
|
||||
// ->
|
||||
// li a1, 1028
|
||||
// sh a1, 0(a0)
|
||||
if (DCI.isBeforeLegalize() && IsScalarizable &&
|
||||
ISD::isBuildVectorAllZeros(Val.getNode())) {
|
||||
auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
|
||||
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
|
||||
ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
|
||||
// Get the constant vector bits
|
||||
APInt NewC(Val.getValueSizeInBits(), 0);
|
||||
for (unsigned i = 0; i < Val.getNumOperands(); i++) {
|
||||
if (Val.getOperand(i).isUndef())
|
||||
continue;
|
||||
NewC.insertBits(Val.getConstantOperandAPInt(i),
|
||||
i * Val.getScalarValueSizeInBits());
|
||||
}
|
||||
MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
|
||||
|
||||
if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
|
||||
Subtarget.getFeatureBits(), true) <= 2 &&
|
||||
allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
|
||||
NewVT, *Store->getMemOperand())) {
|
||||
auto NewV = DAG.getConstant(0, DL, NewVT);
|
||||
SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
|
||||
return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
|
||||
Store->getPointerInfo(), Store->getOriginalAlign(),
|
||||
Store->getMemOperand()->getFlags());
|
||||
|
||||
@@ -63,108 +63,103 @@ define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
; Some tests return this struct because the stores end up being scalarized.
|
||||
%x4v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
|
||||
|
||||
define %x4v4i8 @buildvec_vid_step2_add0_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vadd.vv v8, v8, v8
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: vse8.v v8, (a1)
|
||||
; CHECK-NEXT: vse8.v v8, (a2)
|
||||
; CHECK-NEXT: vse8.v v8, (a3)
|
||||
; CHECK-NEXT: vmv1r.v v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v10, v8
|
||||
; CHECK-NEXT: vmv1r.v v11, v8
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 0, i8 2, i8 4, i8 6>, ptr %z0
|
||||
store <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, ptr %z1
|
||||
store <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, ptr %z2
|
||||
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, ptr %z3
|
||||
ret void
|
||||
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 2, i8 4, i8 6>, 0
|
||||
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, 1
|
||||
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, 2
|
||||
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, 3
|
||||
ret %x4v4i8 %4
|
||||
}
|
||||
|
||||
define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
define %x4v4i8 @buildvec_vid_step2_add1_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vadd.vv v8, v8, v8
|
||||
; CHECK-NEXT: vadd.vi v8, v8, 1
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: vse8.v v8, (a1)
|
||||
; CHECK-NEXT: vse8.v v8, (a2)
|
||||
; CHECK-NEXT: vse8.v v8, (a3)
|
||||
; CHECK-NEXT: vmv1r.v v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v10, v8
|
||||
; CHECK-NEXT: vmv1r.v v11, v8
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 1, i8 3, i8 5, i8 7>, ptr %z0
|
||||
store <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, ptr %z1
|
||||
store <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, ptr %z2
|
||||
store <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, ptr %z3
|
||||
ret void
|
||||
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 1, i8 3, i8 5, i8 7>, 0
|
||||
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, 1
|
||||
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, 2
|
||||
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, 3
|
||||
ret %x4v4i8 %4
|
||||
}
|
||||
|
||||
; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate
|
||||
; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before
|
||||
; being combined.
|
||||
define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
define %x4v4i8 @buildvec_vid_stepn1_add0_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vrsub.vi v8, v8, 0
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: vse8.v v8, (a1)
|
||||
; CHECK-NEXT: vse8.v v8, (a2)
|
||||
; CHECK-NEXT: vse8.v v8, (a3)
|
||||
; CHECK-NEXT: vmv1r.v v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v10, v8
|
||||
; CHECK-NEXT: vmv1r.v v11, v8
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, ptr %z0
|
||||
store <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, ptr %z1
|
||||
store <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, ptr %z2
|
||||
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, ptr %z3
|
||||
ret void
|
||||
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, 0
|
||||
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, 1
|
||||
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, 2
|
||||
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, 3
|
||||
ret %x4v4i8 %4
|
||||
}
|
||||
|
||||
define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
define %x4v4i8 @buildvec_vid_stepn2_add0_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vadd.vv v8, v8, v8
|
||||
; CHECK-NEXT: vrsub.vi v8, v8, 0
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: vse8.v v8, (a1)
|
||||
; CHECK-NEXT: vse8.v v8, (a2)
|
||||
; CHECK-NEXT: vse8.v v8, (a3)
|
||||
; CHECK-NEXT: vmv.v.i v11, -6
|
||||
; CHECK-NEXT: vmv1r.v v9, v8
|
||||
; CHECK-NEXT: vmv1r.v v10, v8
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, ptr %z0
|
||||
store <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, ptr %z1
|
||||
store <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, ptr %z2
|
||||
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -6>, ptr %z3
|
||||
ret void
|
||||
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, 0
|
||||
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, 1
|
||||
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, 2
|
||||
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>, 3
|
||||
ret %x4v4i8 %4
|
||||
}
|
||||
|
||||
define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vadd.vv v8, v8, v8
|
||||
; CHECK-NEXT: vrsub.vi v8, v8, 3
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>, ptr %z0
|
||||
ret void
|
||||
ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
|
||||
}
|
||||
|
||||
define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
|
||||
; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, 3
|
||||
; CHECK-NEXT: vid.v v9
|
||||
; CHECK-NEXT: li a1, -3
|
||||
; CHECK-NEXT: vmadd.vx v9, a1, v8
|
||||
; CHECK-NEXT: vse8.v v9, (a0)
|
||||
; CHECK-NEXT: vmv.v.i v9, 3
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: li a0, -3
|
||||
; CHECK-NEXT: vmadd.vx v8, a0, v9
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>, ptr %z0
|
||||
ret void
|
||||
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
|
||||
}
|
||||
|
||||
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
|
||||
@@ -235,43 +230,37 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
|
||||
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
|
||||
}
|
||||
|
||||
define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) {
|
||||
%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
|
||||
|
||||
define %x6v4i8 @buildvec_no_vid_v4i8() {
|
||||
; CHECK-LABEL: buildvec_no_vid_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a6, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_0)
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0)
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vle8.v v8, (a6)
|
||||
; CHECK-NEXT: lui a6, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_1)
|
||||
; CHECK-NEXT: vle8.v v9, (a6)
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: vse8.v v9, (a1)
|
||||
; CHECK-NEXT: vle8.v v8, (a0)
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_1)
|
||||
; CHECK-NEXT: vle8.v v9, (a0)
|
||||
; CHECK-NEXT: li a0, 1
|
||||
; CHECK-NEXT: slli a0, a0, 11
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; CHECK-NEXT: vmv.v.x v8, a0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vse8.v v8, (a2)
|
||||
; CHECK-NEXT: vmv.v.x v10, a0
|
||||
; CHECK-NEXT: li a0, 2047
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; CHECK-NEXT: vmv.v.x v8, a0
|
||||
; CHECK-NEXT: vmv.v.x v11, a0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_2)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2)
|
||||
; CHECK-NEXT: vle8.v v9, (a0)
|
||||
; CHECK-NEXT: vse8.v v8, (a3)
|
||||
; CHECK-NEXT: vmv.v.i v8, -2
|
||||
; CHECK-NEXT: vse8.v v8, (a4)
|
||||
; CHECK-NEXT: vse8.v v9, (a5)
|
||||
; CHECK-NEXT: vle8.v v13, (a0)
|
||||
; CHECK-NEXT: vmv.v.i v12, -2
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i8> <i8 1, i8 3, i8 6, i8 7>, ptr %z0
|
||||
store <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, ptr %z1
|
||||
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, ptr %z2
|
||||
store <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, ptr %z3
|
||||
store <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, ptr %z4
|
||||
store <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, ptr %z5
|
||||
ret void
|
||||
%1 = insertvalue %x6v4i8 poison, <4 x i8> <i8 1, i8 3, i8 6, i8 7>, 0
|
||||
%2 = insertvalue %x6v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, 1
|
||||
%3 = insertvalue %x6v4i8 %2, <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, 2
|
||||
%4 = insertvalue %x6v4i8 %3, <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, 3
|
||||
%5 = insertvalue %x6v4i8 %4, <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, 4
|
||||
%6 = insertvalue %x6v4i8 %5, <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, 5
|
||||
ret %x6v4i8 %6
|
||||
}
|
||||
|
||||
define void @buildvec_dominant0_v8i16(ptr %x) {
|
||||
@@ -300,35 +289,30 @@ define void @buildvec_dominant1_v8i16(ptr %x) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @buildvec_dominant0_v2i8(ptr %x) {
|
||||
define <2 x i8> @buildvec_dominant0_v2i8() {
|
||||
; CHECK-LABEL: buildvec_dominant0_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 undef, i8 undef>, ptr %x
|
||||
ret void
|
||||
ret <2 x i8> <i8 undef, i8 undef>
|
||||
}
|
||||
|
||||
define void @buildvec_dominant1_v2i8(ptr %x) {
|
||||
define <2 x i8> @buildvec_dominant1_v2i8() {
|
||||
; CHECK-LABEL: buildvec_dominant1_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, -1
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 undef, i8 -1>, ptr %x
|
||||
ret void
|
||||
ret <2 x i8> <i8 undef, i8 -1>
|
||||
}
|
||||
|
||||
define void @buildvec_dominant2_v2i8(ptr %x) {
|
||||
define <2 x i8> @buildvec_dominant2_v2i8() {
|
||||
; CHECK-LABEL: buildvec_dominant2_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vrsub.vi v8, v8, 0
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 0, i8 -1>, ptr %x
|
||||
ret void
|
||||
ret <2 x i8> <i8 0, i8 -1>
|
||||
}
|
||||
|
||||
define void @buildvec_dominant0_v2i32(ptr %x) {
|
||||
|
||||
@@ -7,17 +7,8 @@
|
||||
define void @splat_ones_v1i1(ptr %x) {
|
||||
; CHECK-LABEL: splat_ones_v1i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vmset.m v0
|
||||
; CHECK-NEXT: vmv.v.i v8, 0
|
||||
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v9, 0
|
||||
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, tu, ma
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; CHECK-NEXT: vmsne.vi v8, v9, 0
|
||||
; CHECK-NEXT: vsm.v v8, (a0)
|
||||
; CHECK-NEXT: li a1, 1
|
||||
; CHECK-NEXT: sb a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <1 x i1> <i1 1>, ptr %x
|
||||
ret void
|
||||
@@ -83,17 +74,8 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) {
|
||||
define void @splat_ones_v4i1(ptr %x) {
|
||||
; CHECK-LABEL: splat_ones_v4i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vmset.m v0
|
||||
; CHECK-NEXT: vmv.v.i v8, 0
|
||||
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v9, 0
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 0
|
||||
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; CHECK-NEXT: vmsne.vi v8, v9, 0
|
||||
; CHECK-NEXT: vsm.v v8, (a0)
|
||||
; CHECK-NEXT: li a1, 15
|
||||
; CHECK-NEXT: sb a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <4 x i1> <i1 1, i1 1, i1 1, i1 1>, ptr %x
|
||||
ret void
|
||||
@@ -149,9 +131,8 @@ define void @splat_v8i1(ptr %x, i1 %y) {
|
||||
define void @splat_ones_v16i1(ptr %x) {
|
||||
; CHECK-LABEL: splat_ones_v16i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
||||
; CHECK-NEXT: vmset.m v8
|
||||
; CHECK-NEXT: vsm.v v8, (a0)
|
||||
; CHECK-NEXT: li a1, -1
|
||||
; CHECK-NEXT: sh a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
|
||||
ret void
|
||||
@@ -220,16 +201,6 @@ define void @splat_v32i1(ptr %x, i1 %y) {
|
||||
}
|
||||
|
||||
define void @splat_ones_v64i1(ptr %x) {
|
||||
; LMULMAX2-LABEL: splat_ones_v64i1:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a1, a0, 4
|
||||
; LMULMAX2-NEXT: li a2, 32
|
||||
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
|
||||
; LMULMAX2-NEXT: vmset.m v8
|
||||
; LMULMAX2-NEXT: vsm.v v8, (a1)
|
||||
; LMULMAX2-NEXT: vsm.v v8, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
||||
@@ -245,15 +216,8 @@ define void @splat_ones_v64i1(ptr %x) {
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
||||
; LMULMAX1-RV64-NEXT: vmset.m v8
|
||||
; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a1, a0, 6
|
||||
; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
|
||||
; LMULMAX1-RV64-NEXT: addi a1, a0, 4
|
||||
; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
|
||||
; LMULMAX1-RV64-NEXT: addi a0, a0, 2
|
||||
; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
|
||||
; LMULMAX1-RV64-NEXT: li a1, -1
|
||||
; LMULMAX1-RV64-NEXT: sd a1, 0(a0)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
|
||||
ret void
|
||||
|
||||
@@ -191,27 +191,27 @@ define void @store_v6i1(ptr %p, <6 x i1> %v) {
|
||||
define void @store_constant_v2i8(ptr %p) {
|
||||
; CHECK-LABEL: store_constant_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, 3
|
||||
; CHECK-NEXT: vid.v v9
|
||||
; CHECK-NEXT: li a1, 3
|
||||
; CHECK-NEXT: vmadd.vx v9, a1, v8
|
||||
; CHECK-NEXT: vse8.v v9, (a0)
|
||||
; CHECK-NEXT: li a1, 1539
|
||||
; CHECK-NEXT: sh a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 3, i8 6>, ptr %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_constant_v2i16(ptr %p) {
|
||||
; CHECK-LABEL: store_constant_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, 3
|
||||
; CHECK-NEXT: vid.v v9
|
||||
; CHECK-NEXT: li a1, 3
|
||||
; CHECK-NEXT: vmadd.vx v9, a1, v8
|
||||
; CHECK-NEXT: vse16.v v9, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: store_constant_v2i16:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: lui a1, 96
|
||||
; RV32-NEXT: addi a1, a1, 3
|
||||
; RV32-NEXT: sw a1, 0(a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: store_constant_v2i16:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, 96
|
||||
; RV64-NEXT: addiw a1, a1, 3
|
||||
; RV64-NEXT: sw a1, 0(a0)
|
||||
; RV64-NEXT: ret
|
||||
store <2 x i16> <i16 3, i16 6>, ptr %p
|
||||
ret void
|
||||
}
|
||||
@@ -231,14 +231,19 @@ define void @store_constant_v2i32(ptr %p) {
|
||||
}
|
||||
|
||||
define void @store_constant_v4i8(ptr %p) {
|
||||
; CHECK-LABEL: store_constant_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0)
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vle8.v v8, (a1)
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: store_constant_v4i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: lui a1, 4176
|
||||
; RV32-NEXT: addi a1, a1, 1539
|
||||
; RV32-NEXT: sw a1, 0(a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: store_constant_v4i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, 4176
|
||||
; RV64-NEXT: addiw a1, a1, 1539
|
||||
; RV64-NEXT: sw a1, 0(a0)
|
||||
; RV64-NEXT: ret
|
||||
store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
|
||||
ret void
|
||||
}
|
||||
@@ -270,12 +275,19 @@ define void @store_constant_v4i32(ptr %p) {
|
||||
}
|
||||
|
||||
define void @store_id_v4i8(ptr %p) {
|
||||
; CHECK-LABEL: store_id_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: store_id_v4i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: lui a1, 12320
|
||||
; RV32-NEXT: addi a1, a1, 256
|
||||
; RV32-NEXT: sw a1, 0(a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: store_id_v4i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: lui a1, 12320
|
||||
; RV64-NEXT: addiw a1, a1, 256
|
||||
; RV64-NEXT: sw a1, 0(a0)
|
||||
; RV64-NEXT: ret
|
||||
store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
|
||||
ret void
|
||||
}
|
||||
@@ -297,9 +309,8 @@ define void @store_constant_v2i8_align1(ptr %p) {
|
||||
define void @store_constant_splat_v2i8(ptr %p) {
|
||||
; CHECK-LABEL: store_constant_splat_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, 3
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: li a1, 771
|
||||
; CHECK-NEXT: sh a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 3, i8 3>, ptr %p
|
||||
ret void
|
||||
@@ -308,9 +319,8 @@ define void @store_constant_splat_v2i8(ptr %p) {
|
||||
define void @store_constant_undef_v2i8(ptr %p) {
|
||||
; CHECK-LABEL: store_constant_undef_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v8, 3
|
||||
; CHECK-NEXT: vse8.v v8, (a0)
|
||||
; CHECK-NEXT: li a1, 768
|
||||
; CHECK-NEXT: sh a1, 0(a0)
|
||||
; CHECK-NEXT: ret
|
||||
store <2 x i8> <i8 undef, i8 3>, ptr %p
|
||||
ret void
|
||||
|
||||
Reference in New Issue
Block a user