AMDGPU: Skip fneg/select combine if it can fold into other

llvm-svn: 291792
2026-02-05 04:46:27 +08:00 · 2017-01-12 18:58:15 +00:00
parent c4427a3976
commit 45337df08f
3 changed files with 199 additions and 29 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -484,6 +484,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 // Target Information
 //===----------------------------------------------------------------------===//

+static bool fnegFoldsIntoOp(unsigned Opc) {
+  switch (Opc) {
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FMA:
+  case ISD::FMAD:
+  case ISD::FSIN:
+  case AMDGPUISD::RCP:
+  case AMDGPUISD::RCP_LEGACY:
+  case AMDGPUISD::SIN_HW:
+  case AMDGPUISD::FMUL_LEGACY:
+    return true;
+  default:
+    return false;
+  }
+}
+
 MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
  return MVT::i32;
 }
@@ -2738,20 +2756,31 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
    SDValue NewLHS = LHS.getOperand(0);
    SDValue NewRHS = RHS;

-    // TODO: Skip for operations where other combines can absord the fneg.
+    // Careful: if the neg can be folded up, don't try to pull it back down.
+    bool ShouldFoldNeg = true;

-    if (LHS.getOpcode() == ISD::FNEG)
-      NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
-    else if (CRHS->isNegative())
-      return SDValue();
+    if (NewLHS.hasOneUse()) {
+      unsigned Opc = NewLHS.getOpcode();
+      if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
+        ShouldFoldNeg = false;
+      if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
+        ShouldFoldNeg = false;
+    }

-    if (Inv)
-      std::swap(NewLHS, NewRHS);
+    if (ShouldFoldNeg) {
+      if (LHS.getOpcode() == ISD::FNEG)
+        NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+      else if (CRHS->isNegative())
+        return SDValue();

-    SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
-                                    Cond, NewLHS, NewRHS);
-    DCI.AddToWorklist(NewSelect.getNode());
-    return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
+      if (Inv)
+        std::swap(NewLHS, NewRHS);
+
+      SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
+                                      Cond, NewLHS, NewRHS);
+      DCI.AddToWorklist(NewSelect.getNode());
+      return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
+    }
  }

  return SDValue();
@@ -2806,24 +2835,6 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
  return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
 }

-static bool fnegFoldsIntoOp(unsigned Opc) {
-  switch (Opc) {
-  case ISD::FADD:
-  case ISD::FSUB:
-  case ISD::FMUL:
-  case ISD::FMA:
-  case ISD::FMAD:
-  case ISD::FSIN:
-  case AMDGPUISD::RCP:
-  case AMDGPUISD::RCP_LEGACY:
-  case AMDGPUISD::SIN_HW:
-  case AMDGPUISD::FMUL_LEGACY:
-    return true;
-  default:
-    return false;
-  }
-}
-
 SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
  SelectionDAG &DAG = DCI.DAG;
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+
+; --------------------------------------------------------------------------------
+; Don't fold if fneg can fold into the source
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
+  %x = load volatile float, float addrspace(1)* undef
+  %y = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %rcp = call float @llvm.amdgcn.rcp.legacy(float %x)
+  %fneg = fsub float -0.0, %rcp
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
+  %x = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0)
+  %fneg = fsub float -0.0, %mul
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+declare float @llvm.amdgcn.rcp.legacy(float) #1
+declare float @llvm.amdgcn.fmul.legacy(float, float) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -721,7 +721,120 @@ define void @mul_select_negk_negfabs_f32(i32 %c) #0 {
  ret void
 }

+; --------------------------------------------------------------------------------
+; Don't fold if fneg can fold into the source
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_add_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %y = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %add = fadd float %x, 4.0
+  %fneg = fsub float -0.0, %add
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %add = fsub float %x, 4.0
+  %fneg = fsub float -0.0, %add
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %mul = fmul float %x, 4.0
+  %fneg = fsub float -0.0, %mul
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Z:v[0-9]+]]
+
+; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %z = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
+  %fneg = fsub float -0.0, %fma
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Z:v[0-9]+]]
+
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
+define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %z = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
+  %fneg = fsub float -0.0, %fmad
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
+; FIXME: This one should fold to rcp
+; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+
+; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
+; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
+; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
+  %x = load volatile float, float addrspace(1)* undef
+  %y = load volatile float, float addrspace(1)* undef
+  %cmp = icmp eq i32 %c, 0
+  %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
+  %fneg = fsub float -0.0, %rcp
+  %select = select i1 %cmp, float %fneg, float 2.0
+  store volatile float %select, float addrspace(1)* undef
+  ret void
+}
+
 declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare float @llvm.amdgcn.rcp.f32(float) #1
+declare float @llvm.amdgcn.rcp.legacy(float) #1
+declare float @llvm.amdgcn.fmul.legacy(float, float) #1

 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }