mirror of
https://github.com/intel/llvm.git
synced 2026-02-05 04:46:27 +08:00
AMDGPU: Skip fneg/select combine if it can fold into other
llvm-svn: 291792
This commit is contained in:
@@ -484,6 +484,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
// Target Information
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static bool fnegFoldsIntoOp(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD:
|
||||
case ISD::FSIN:
|
||||
case AMDGPUISD::RCP:
|
||||
case AMDGPUISD::RCP_LEGACY:
|
||||
case AMDGPUISD::SIN_HW:
|
||||
case AMDGPUISD::FMUL_LEGACY:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
|
||||
return MVT::i32;
|
||||
}
|
||||
@@ -2738,20 +2756,31 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
|
||||
SDValue NewLHS = LHS.getOperand(0);
|
||||
SDValue NewRHS = RHS;
|
||||
|
||||
// TODO: Skip for operations where other combines can absord the fneg.
|
||||
// Careful: if the neg can be folded up, don't try to pull it back down.
|
||||
bool ShouldFoldNeg = true;
|
||||
|
||||
if (LHS.getOpcode() == ISD::FNEG)
|
||||
NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
|
||||
else if (CRHS->isNegative())
|
||||
return SDValue();
|
||||
if (NewLHS.hasOneUse()) {
|
||||
unsigned Opc = NewLHS.getOpcode();
|
||||
if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
|
||||
ShouldFoldNeg = false;
|
||||
if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
|
||||
ShouldFoldNeg = false;
|
||||
}
|
||||
|
||||
if (Inv)
|
||||
std::swap(NewLHS, NewRHS);
|
||||
if (ShouldFoldNeg) {
|
||||
if (LHS.getOpcode() == ISD::FNEG)
|
||||
NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
|
||||
else if (CRHS->isNegative())
|
||||
return SDValue();
|
||||
|
||||
SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
|
||||
Cond, NewLHS, NewRHS);
|
||||
DCI.AddToWorklist(NewSelect.getNode());
|
||||
return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
|
||||
if (Inv)
|
||||
std::swap(NewLHS, NewRHS);
|
||||
|
||||
SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
|
||||
Cond, NewLHS, NewRHS);
|
||||
DCI.AddToWorklist(NewSelect.getNode());
|
||||
return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@@ -2806,24 +2835,6 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
|
||||
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
|
||||
}
|
||||
|
||||
static bool fnegFoldsIntoOp(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD:
|
||||
case ISD::FSIN:
|
||||
case AMDGPUISD::RCP:
|
||||
case AMDGPUISD::RCP_LEGACY:
|
||||
case AMDGPUISD::SIN_HW:
|
||||
case AMDGPUISD::FMUL_LEGACY:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
46
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
Normal file
46
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
Normal file
@@ -0,0 +1,46 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; Don't fold if fneg can fold into the source
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
|
||||
; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
|
||||
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
|
||||
define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%y = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%rcp = call float @llvm.amdgcn.rcp.legacy(float %x)
|
||||
%fneg = fsub float -0.0, %rcp
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
|
||||
; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
|
||||
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
|
||||
define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0)
|
||||
%fneg = fsub float -0.0, %mul
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.rcp.legacy(float) #1
|
||||
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
@@ -721,7 +721,120 @@ define void @mul_select_negk_negfabs_f32(i32 %c) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; Don't fold if fneg can fold into the source
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
|
||||
; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
|
||||
; GCN-NEXT: buffer_store_dword [[SELECT]]
|
||||
define void @select_fneg_posk_src_add_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%y = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%add = fadd float %x, 4.0
|
||||
%fneg = fsub float -0.0, %add
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
|
||||
; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
|
||||
; GCN-NEXT: buffer_store_dword [[SELECT]]
|
||||
define void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%add = fsub float %x, 4.0
|
||||
%fneg = fsub float -0.0, %add
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
|
||||
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
|
||||
; GCN-NEXT: buffer_store_dword [[SELECT]]
|
||||
define void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%mul = fmul float %x, 4.0
|
||||
%fneg = fsub float -0.0, %mul
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
|
||||
; GCN-NEXT: buffer_store_dword [[SELECT]]
|
||||
define void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%z = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
|
||||
%fneg = fsub float -0.0, %fma
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Z:v[0-9]+]]
|
||||
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
|
||||
; GCN-NEXT: buffer_store_dword [[SELECT]]
|
||||
define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%z = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
|
||||
%fneg = fsub float -0.0, %fmad
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: This one should fold to rcp
|
||||
; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
|
||||
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
|
||||
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
|
||||
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
|
||||
define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
|
||||
%x = load volatile float, float addrspace(1)* undef
|
||||
%y = load volatile float, float addrspace(1)* undef
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%rcp = call float @llvm.amdgcn.rcp.f32(float %x)
|
||||
%fneg = fsub float -0.0, %rcp
|
||||
%select = select i1 %cmp, float %fneg, float 2.0
|
||||
store volatile float %select, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
declare float @llvm.fmuladd.f32(float, float, float) #1
|
||||
declare float @llvm.amdgcn.rcp.f32(float) #1
|
||||
declare float @llvm.amdgcn.rcp.legacy(float) #1
|
||||
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
Reference in New Issue
Block a user