mirror of
https://github.com/intel/llvm.git
synced 2026-01-20 10:58:11 +08:00
Revert "[VPlan] Remove legacy costing inside VPBlendRecipe::computeCost (#171846)"
This reverts commit fd5f53aa9b.
It's triggering legacy cost model assertions reported in
https://github.com/llvm/llvm-project/pull/171846#issuecomment-3647640019
This commit is contained in:
@@ -7081,11 +7081,6 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
|
||||
return true;
|
||||
}
|
||||
|
||||
// The legacy cost model costs non-header phis with a scalar VF as a phi,
|
||||
// but scalar unrolled VPlans will have VPBlendRecipes which emit selects.
|
||||
if (VF.isScalar() && isa<VPBlendRecipe>(&R))
|
||||
return true;
|
||||
|
||||
/// If a VPlan transform folded a recipe to one producing a single-scalar,
|
||||
/// but the original instruction wasn't uniform-after-vectorization in the
|
||||
/// legacy cost model, the legacy cost overestimates the actual cost.
|
||||
|
||||
@@ -2551,6 +2551,11 @@ void VPVectorPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
|
||||
|
||||
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
|
||||
VPCostContext &Ctx) const {
|
||||
// Handle cases where only the first lane is used the same way as the legacy
|
||||
// cost model.
|
||||
if (vputils::onlyFirstLaneUsed(this))
|
||||
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
|
||||
|
||||
Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
|
||||
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
|
||||
return (getNumIncomingValues() - 1) *
|
||||
|
||||
@@ -8,25 +8,48 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @smax_call_uniform(ptr %dst, i64 %x) {
|
||||
; CHECK-LABEL: define void @smax_call_uniform(
|
||||
; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
|
||||
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; CHECK: [[LOOP_HEADER]]:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
||||
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
|
||||
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM]], i64 0)
|
||||
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
||||
; CHECK: [[LOOP_LATCH]]:
|
||||
; CHECK-NEXT: [[PREDPHI7:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
|
||||
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_UREM_IF]]:
|
||||
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
|
||||
; CHECK: [[PRED_UREM_CONTINUE]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
|
||||
; CHECK: [[PRED_UREM_IF1]]:
|
||||
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]]
|
||||
; CHECK: [[PRED_UREM_CONTINUE2]]:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]]
|
||||
; CHECK: [[PRED_UREM_IF3]]:
|
||||
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]]
|
||||
; CHECK: [[PRED_UREM_CONTINUE4]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]]
|
||||
; CHECK: [[PRED_UREM_IF5]]:
|
||||
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]]
|
||||
; CHECK: [[PRED_UREM_CONTINUE6]]:
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0)
|
||||
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], 1
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -58,3 +81,8 @@ exit:
|
||||
}
|
||||
|
||||
declare i64 @llvm.smax.i64(i64, i64)
|
||||
;.
|
||||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
||||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
||||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
;.
|
||||
|
||||
Reference in New Issue
Block a user