[DAGCombine] Improve bswap lowering for machines that support bit rotates (#164848)

Source: Hacker's delight.
This commit is contained in:
AZero13
2025-10-25 13:17:15 -04:00
committed by GitHub
parent fd804f076d
commit 5d0f1591f8
3 changed files with 78 additions and 96 deletions

View File

@@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
// Use a rotate by 8. This can be further expanded if necessary.
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
// This is meant for ARM speficially, which has ROTR but no ROTL.
if (isOperationLegalOrCustom(ISD::ROTR, VT)) {
SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
// (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
SDValue Rotr =
DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
SDValue Rotl =
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
}
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
DAG.getConstant(0xFF00, dl, VT));

View File

@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
; BSWAP is not supported by 32 bit target
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r2, #255
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r2, r2, #16711680
; CHECK-NEXT: and r3, r0, r2
; CHECK-NEXT: and r0, r2, r0, ror #24
; CHECK-NEXT: orr r0, r0, r3, ror #8
; CHECK-NEXT: and r3, r1, r2
; CHECK-NEXT: and r1, r2, r1, ror #24
; CHECK-NEXT: orr r1, r1, r3, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r0, r1
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:

View File

@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
; BSWAP is not supported by 32 bit target
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r2, #255
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r2, r2, #16711680
; CHECK-NEXT: and r3, r0, r2
; CHECK-NEXT: and r0, r2, r0, ror #24
; CHECK-NEXT: orr r0, r0, r3, ror #8
; CHECK-NEXT: and r3, r1, r2
; CHECK-NEXT: and r1, r2, r1, ror #24
; CHECK-NEXT: orr r1, r1, r3, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16: