[RISCV] Scalarize constant stores of fixed vectors if small enough

For stores of small fixed-length vector constants, we can store them
with a sequence of lui/addi/sh/sw to avoid the cost of building the
vector and the vsetivli toggle, provided the constant materialization
cost isn't too high.

This subsumes the optimisation for stores of zeroes in
4dc9a2c5b9

(This is a reapply of 0ca13f9d27)

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D151221
This commit is contained in:
Luke Lau
2023-05-22 17:51:32 +01:00
parent 173855f9b0
commit a8a36ee522
4 changed files with 149 additions and 177 deletions

View File

@@ -12193,18 +12193,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
isPowerOf2_64(MemVT.getSizeInBits()) &&
MemVT.getSizeInBits() <= Subtarget.getXLen();
// Using vector to store zeros requires e.g.:
// vsetivli zero, 2, e64, m1, ta, ma
// vmv.v.i v8, 0
// If sufficiently aligned we can scalarize stores of constant vectors of
// any power-of-two size up to XLen bits, provided that they aren't too
// expensive to materialize.
// vsetivli zero, 2, e8, m1, ta, ma
// vmv.v.i v8, 4
// vse64.v v8, (a0)
// If sufficiently aligned, we can use at most one scalar store to zero
// initialize any power-of-two size up to XLen bits.
// ->
// li a1, 1028
// sh a1, 0(a0)
if (DCI.isBeforeLegalize() && IsScalarizable &&
ISD::isBuildVectorAllZeros(Val.getNode())) {
auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
// Get the constant vector bits
APInt NewC(Val.getValueSizeInBits(), 0);
for (unsigned i = 0; i < Val.getNumOperands(); i++) {
if (Val.getOperand(i).isUndef())
continue;
NewC.insertBits(Val.getConstantOperandAPInt(i),
i * Val.getScalarValueSizeInBits());
}
MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
Subtarget.getFeatureBits(), true) <= 2 &&
allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
NewVT, *Store->getMemOperand())) {
auto NewV = DAG.getConstant(0, DL, NewVT);
SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
Store->getPointerInfo(), Store->getOriginalAlign(),
Store->getMemOperand()->getFlags());

View File

@@ -63,108 +63,103 @@ define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
ret void
}
define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
; Some tests return this struct because the stores end up being scalarized.
%x4v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
define %x4v4i8 @buildvec_vid_step2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a2)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vmv1r.v v11, v8
; CHECK-NEXT: ret
store <4 x i8> <i8 0, i8 2, i8 4, i8 6>, ptr %z0
store <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, ptr %z1
store <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, ptr %z2
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, ptr %z3
ret void
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 2, i8 4, i8 6>, 0
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, 1
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, 2
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, 3
ret %x4v4i8 %4
}
define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
define %x4v4i8 @buildvec_vid_step2_add1_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a2)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vmv1r.v v11, v8
; CHECK-NEXT: ret
store <4 x i8> <i8 1, i8 3, i8 5, i8 7>, ptr %z0
store <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, ptr %z1
store <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, ptr %z2
store <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, ptr %z3
ret void
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 1, i8 3, i8 5, i8 7>, 0
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, 1
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, 2
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, 3
ret %x4v4i8 %4
}
; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate
; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before
; being combined.
define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
define %x4v4i8 @buildvec_vid_stepn1_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a2)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vmv1r.v v11, v8
; CHECK-NEXT: ret
store <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, ptr %z0
store <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, ptr %z1
store <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, ptr %z2
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, ptr %z3
ret void
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, 0
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, 1
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, 2
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, 3
ret %x4v4i8 %4
}
define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
define %x4v4i8 @buildvec_vid_stepn2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a2)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vmv.v.i v11, -6
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: ret
store <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, ptr %z0
store <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, ptr %z1
store <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, ptr %z2
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -6>, ptr %z3
ret void
%1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, 0
%2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, 1
%3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, 2
%4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>, 3
ret %x4v4i8 %4
}
define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 3
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>, ptr %z0
ret void
ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
}
define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: li a1, -3
; CHECK-NEXT: vmadd.vx v9, a1, v8
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a0, -3
; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
store <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>, ptr %z0
ret void
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
}
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
@@ -235,43 +230,37 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) {
%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
define %x6v4i8 @buildvec_no_vid_v4i8() {
; CHECK-LABEL: buildvec_no_vid_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a6, %hi(.LCPI14_0)
; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_0)
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a6)
; CHECK-NEXT: lui a6, %hi(.LCPI14_1)
; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_1)
; CHECK-NEXT: vle8.v v9, (a6)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: vse8.v v9, (a1)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_1)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vse8.v v8, (a2)
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: li a0, 2047
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vmv.v.x v11, a0
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: lui a0, %hi(.LCPI14_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: vmv.v.i v8, -2
; CHECK-NEXT: vse8.v v8, (a4)
; CHECK-NEXT: vse8.v v9, (a5)
; CHECK-NEXT: vle8.v v13, (a0)
; CHECK-NEXT: vmv.v.i v12, -2
; CHECK-NEXT: ret
store <4 x i8> <i8 1, i8 3, i8 6, i8 7>, ptr %z0
store <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, ptr %z1
store <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, ptr %z2
store <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, ptr %z3
store <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, ptr %z4
store <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, ptr %z5
ret void
%1 = insertvalue %x6v4i8 poison, <4 x i8> <i8 1, i8 3, i8 6, i8 7>, 0
%2 = insertvalue %x6v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, 1
%3 = insertvalue %x6v4i8 %2, <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, 2
%4 = insertvalue %x6v4i8 %3, <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, 3
%5 = insertvalue %x6v4i8 %4, <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, 4
%6 = insertvalue %x6v4i8 %5, <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, 5
ret %x6v4i8 %6
}
define void @buildvec_dominant0_v8i16(ptr %x) {
@@ -300,35 +289,30 @@ define void @buildvec_dominant1_v8i16(ptr %x) {
ret void
}
define void @buildvec_dominant0_v2i8(ptr %x) {
define <2 x i8> @buildvec_dominant0_v2i8() {
; CHECK-LABEL: buildvec_dominant0_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
store <2 x i8> <i8 undef, i8 undef>, ptr %x
ret void
ret <2 x i8> <i8 undef, i8 undef>
}
define void @buildvec_dominant1_v2i8(ptr %x) {
define <2 x i8> @buildvec_dominant1_v2i8() {
; CHECK-LABEL: buildvec_dominant1_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, -1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 undef, i8 -1>, ptr %x
ret void
ret <2 x i8> <i8 undef, i8 -1>
}
define void @buildvec_dominant2_v2i8(ptr %x) {
define <2 x i8> @buildvec_dominant2_v2i8() {
; CHECK-LABEL: buildvec_dominant2_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 0, i8 -1>, ptr %x
ret void
ret <2 x i8> <i8 0, i8 -1>
}
define void @buildvec_dominant0_v2i32(ptr %x) {

View File

@@ -7,17 +7,8 @@
define void @splat_ones_v1i1(ptr %x) {
; CHECK-LABEL: splat_ones_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: ret
store <1 x i1> <i1 1>, ptr %x
ret void
@@ -83,17 +74,8 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) {
define void @splat_ones_v4i1(ptr %x) {
; CHECK-LABEL: splat_ones_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: li a1, 15
; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: ret
store <4 x i1> <i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void
@@ -149,9 +131,8 @@ define void @splat_v8i1(ptr %x, i1 %y) {
define void @splat_ones_v16i1(ptr %x) {
; CHECK-LABEL: splat_ones_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmset.m v8
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: li a1, -1
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void
@@ -220,16 +201,6 @@ define void @splat_v32i1(ptr %x, i1 %y) {
}
define void @splat_ones_v64i1(ptr %x) {
; LMULMAX2-LABEL: splat_ones_v64i1:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a1, a0, 4
; LMULMAX2-NEXT: li a2, 32
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT: vmset.m v8
; LMULMAX2-NEXT: vsm.v v8, (a1)
; LMULMAX2-NEXT: vsm.v v8, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
@@ -245,15 +216,8 @@ define void @splat_ones_v64i1(ptr %x) {
;
; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT: vmset.m v8
; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
; LMULMAX1-RV64-NEXT: addi a1, a0, 6
; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
; LMULMAX1-RV64-NEXT: addi a1, a0, 4
; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
; LMULMAX1-RV64-NEXT: addi a0, a0, 2
; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
; LMULMAX1-RV64-NEXT: li a1, -1
; LMULMAX1-RV64-NEXT: sd a1, 0(a0)
; LMULMAX1-RV64-NEXT: ret
store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void

View File

@@ -191,27 +191,27 @@ define void @store_v6i1(ptr %p, <6 x i1> %v) {
define void @store_constant_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vmadd.vx v9, a1, v8
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: li a1, 1539
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 3, i8 6>, ptr %p
ret void
}
define void @store_constant_v2i16(ptr %p) {
; CHECK-LABEL: store_constant_v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vmadd.vx v9, a1, v8
; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
; RV32-LABEL: store_constant_v2i16:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 96
; RV32-NEXT: addi a1, a1, 3
; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_constant_v2i16:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 96
; RV64-NEXT: addiw a1, a1, 3
; RV64-NEXT: sw a1, 0(a0)
; RV64-NEXT: ret
store <2 x i16> <i16 3, i16 6>, ptr %p
ret void
}
@@ -231,14 +231,19 @@ define void @store_constant_v2i32(ptr %p) {
}
define void @store_constant_v4i8(ptr %p) {
; CHECK-LABEL: store_constant_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
; RV32-LABEL: store_constant_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 4176
; RV32-NEXT: addi a1, a1, 1539
; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_constant_v4i8:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 4176
; RV64-NEXT: addiw a1, a1, 1539
; RV64-NEXT: sw a1, 0(a0)
; RV64-NEXT: ret
store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
ret void
}
@@ -270,12 +275,19 @@ define void @store_constant_v4i32(ptr %p) {
}
define void @store_id_v4i8(ptr %p) {
; CHECK-LABEL: store_id_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
; RV32-LABEL: store_id_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 12320
; RV32-NEXT: addi a1, a1, 256
; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_id_v4i8:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 12320
; RV64-NEXT: addiw a1, a1, 256
; RV64-NEXT: sw a1, 0(a0)
; RV64-NEXT: ret
store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
ret void
}
@@ -297,9 +309,8 @@ define void @store_constant_v2i8_align1(ptr %p) {
define void @store_constant_splat_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_splat_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: li a1, 771
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 3, i8 3>, ptr %p
ret void
@@ -308,9 +319,8 @@ define void @store_constant_splat_v2i8(ptr %p) {
define void @store_constant_undef_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_undef_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: li a1, 768
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 undef, i8 3>, ptr %p
ret void