mirror of
https://github.com/intel/llvm.git
synced 2026-01-20 19:07:53 +08:00
[DAGCombine] Improve bswap lowering for machines that support bit rotates (#164848)
Source: Hacker's delight.
This commit is contained in:
@@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
|
||||
// Use a rotate by 8. This can be further expanded if necessary.
|
||||
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
|
||||
case MVT::i32:
|
||||
// This is meant for ARM speficially, which has ROTR but no ROTL.
|
||||
if (isOperationLegalOrCustom(ISD::ROTR, VT)) {
|
||||
SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
|
||||
// (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
|
||||
SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
|
||||
SDValue Rotr =
|
||||
DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
|
||||
SDValue Rotl =
|
||||
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
|
||||
SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
|
||||
return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
|
||||
}
|
||||
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
|
||||
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
|
||||
DAG.getConstant(0xFF00, dl, VT));
|
||||
|
||||
@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
|
||||
; BSWAP is not supported by 32 bit target
|
||||
; CHECK-LABEL: load_i32_by_i8_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
|
||||
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
|
||||
define i64 @load_i64_by_i8_bswap(ptr %arg) {
|
||||
; CHECK-LABEL: load_i64_by_i8_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r2, #255
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: mov r12, #65280
|
||||
; CHECK-NEXT: ldr r0, [r0, #4]
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r3, r12, r0, lsr #8
|
||||
; CHECK-NEXT: orr r3, r3, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: and r2, r12, r1, lsr #8
|
||||
; CHECK-NEXT: orr r0, r0, r3
|
||||
; CHECK-NEXT: and r3, r1, #65280
|
||||
; CHECK-NEXT: orr r2, r2, r1, lsr #24
|
||||
; CHECK-NEXT: lsl r1, r1, #24
|
||||
; CHECK-NEXT: orr r1, r1, r3, lsl #8
|
||||
; CHECK-NEXT: orr r1, r1, r2
|
||||
; CHECK-NEXT: orr r2, r2, #16711680
|
||||
; CHECK-NEXT: and r3, r0, r2
|
||||
; CHECK-NEXT: and r0, r2, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r3, ror #8
|
||||
; CHECK-NEXT: and r3, r1, r2
|
||||
; CHECK-NEXT: and r1, r2, r1, ror #24
|
||||
; CHECK-NEXT: orr r1, r1, r3, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
|
||||
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
|
||||
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0, #1]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
|
||||
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
|
||||
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_i8_neg_offset:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0, #-4]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
|
||||
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
|
||||
define i32 @load_i32_by_bswap_i16(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_bswap_i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
|
||||
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
|
||||
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add r0, r0, r1
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: ldr r0, [r0, #12]
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
|
||||
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
|
||||
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add r0, r1, r0
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: ldr r0, [r0, #13]
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
|
||||
|
||||
@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
|
||||
; BSWAP is not supported by 32 bit target
|
||||
; CHECK-LABEL: load_i32_by_i8_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
|
||||
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
|
||||
define i64 @load_i64_by_i8_bswap(ptr %arg) {
|
||||
; CHECK-LABEL: load_i64_by_i8_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r2, #255
|
||||
; CHECK-NEXT: ldr r1, [r0]
|
||||
; CHECK-NEXT: mov r12, #65280
|
||||
; CHECK-NEXT: ldr r0, [r0, #4]
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r3, r12, r0, lsr #8
|
||||
; CHECK-NEXT: orr r3, r3, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: and r2, r12, r1, lsr #8
|
||||
; CHECK-NEXT: orr r0, r0, r3
|
||||
; CHECK-NEXT: and r3, r1, #65280
|
||||
; CHECK-NEXT: orr r2, r2, r1, lsr #24
|
||||
; CHECK-NEXT: lsl r1, r1, #24
|
||||
; CHECK-NEXT: orr r1, r1, r3, lsl #8
|
||||
; CHECK-NEXT: orr r1, r1, r2
|
||||
; CHECK-NEXT: orr r2, r2, #16711680
|
||||
; CHECK-NEXT: and r3, r0, r2
|
||||
; CHECK-NEXT: and r0, r2, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r3, ror #8
|
||||
; CHECK-NEXT: and r3, r1, r2
|
||||
; CHECK-NEXT: and r1, r2, r1, ror #24
|
||||
; CHECK-NEXT: orr r1, r1, r3, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
|
||||
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
|
||||
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0, #1]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
|
||||
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
|
||||
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0, #-4]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
|
||||
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
|
||||
define i32 @load_i32_by_bswap_i16(ptr %arg) {
|
||||
; CHECK-LABEL: load_i32_by_bswap_i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r1, #255
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: mov r1, #65280
|
||||
; CHECK-NEXT: and r2, r0, #65280
|
||||
; CHECK-NEXT: and r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr r1, r1, r0, lsr #24
|
||||
; CHECK-NEXT: lsl r0, r0, #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, lsl #8
|
||||
; CHECK-NEXT: orr r0, r0, r1
|
||||
; CHECK-NEXT: orr r1, r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, r1
|
||||
; CHECK-NEXT: and r0, r1, r0, ror #24
|
||||
; CHECK-NEXT: orr r0, r0, r2, ror #8
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
;
|
||||
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
|
||||
|
||||
Reference in New Issue
Block a user