mirror of
https://github.com/intel/llvm.git
synced 2026-02-06 06:31:50 +08:00
[X86] Rewrite getScalarizationOverhead()
All of our insert/extract ops work on 128-bit lanes. For `Insert`, we need to extract affected 128-bit lane, unless it's being fully overwritten (FIXME: do we need to be careful about legalization-induced padding that we obviously don't demand?), perform insertions, and then insert the 128-bit lane back. But hold on. If we are operating on an 256-bit legal vector, and thus have two 128-bit subvectors, and are fully overwriting them both, we don't actually need to insert *both* subvectors, only the second one, into the implicitly-widened first one. Also, `Insert` wasn't actually querying the costs, but just assuming them to be `1`. `getShuffleCost(TTI::SK_ExtractSubvector)` notes: ``` // Note that in general, the insertion starting at the beginning of a vector // isn't free, because we need to preserve the rest of the wide vector. ``` ... so as far as i can tell, we didn't account for that. I was hoping this would allow vectorization at a higher VF at one case i looked at, but the subvector insertion cost is still dis-advising that. The change for `Extract` is NFC, and is for consistency only, i wanted to get rid of of that weird explicit discounting of insertion of 0'th element, since the general code should already deal with that. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D137913
This commit is contained in:
@@ -4392,10 +4392,18 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
|
||||
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||
MVT MScalarTy = LT.second.getScalarType();
|
||||
unsigned SizeInBits = LT.second.getSizeInBits();
|
||||
unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
|
||||
TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
|
||||
InstructionCost Cost = 0;
|
||||
|
||||
constexpr unsigned LaneBitWidth = 128;
|
||||
assert((LegalVectorBitWidth < LaneBitWidth ||
|
||||
(LegalVectorBitWidth % LaneBitWidth) == 0) &&
|
||||
"Illegal vector");
|
||||
|
||||
const int NumLegalVectors = *LT.first.getValue();
|
||||
assert(NumLegalVectors >= 0 && "Negative cost!");
|
||||
|
||||
// For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
|
||||
// cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
|
||||
if (Insert) {
|
||||
@@ -4404,7 +4412,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
(MScalarTy == MVT::f32 && ST->hasSSE41())) {
|
||||
// For types we can insert directly, insertion into 128-bit sub vectors is
|
||||
// cheap, followed by a cheap chain of concatenations.
|
||||
if (SizeInBits <= 128) {
|
||||
if (LegalVectorBitWidth <= LaneBitWidth) {
|
||||
Cost +=
|
||||
BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
|
||||
} else {
|
||||
@@ -4420,32 +4428,51 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
// Case#2: inserting into 5th index needs extracti128 + vpinsrd +
|
||||
// inserti128.
|
||||
// Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
|
||||
const int CostValue = *LT.first.getValue();
|
||||
assert(CostValue >= 0 && "Negative cost!");
|
||||
unsigned Num128Lanes = SizeInBits / 128 * CostValue;
|
||||
unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
|
||||
APInt WidenedDemandedElts = DemandedElts.zext(NumElts);
|
||||
unsigned Scale = NumElts / Num128Lanes;
|
||||
// We iterate each 128-lane, and check if we need a
|
||||
// extracti128/inserti128 for this 128-lane.
|
||||
for (unsigned I = 0; I < NumElts; I += Scale) {
|
||||
APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale);
|
||||
APInt MaskedDE = Mask & WidenedDemandedElts;
|
||||
unsigned Population = MaskedDE.countPopulation();
|
||||
Cost += (Population > 0 && Population != Scale &&
|
||||
I % LT.second.getVectorNumElements() != 0);
|
||||
Cost += Population > 0;
|
||||
}
|
||||
Cost += DemandedElts.countPopulation();
|
||||
assert((LegalVectorBitWidth % LaneBitWidth) == 0 && "Illegal vector");
|
||||
unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
|
||||
unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
|
||||
unsigned NumLegalElts =
|
||||
LT.second.getVectorNumElements() * NumLegalVectors;
|
||||
assert(NumLegalElts >= DemandedElts.getBitWidth() &&
|
||||
"Vector has been legalized to smaller element count");
|
||||
assert((NumLegalElts % NumLanesTotal) == 0 &&
|
||||
"Unexpected elts per lane");
|
||||
unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
|
||||
|
||||
// For vXf32 cases, insertion into the 0'th index in each v4f32
|
||||
// 128-bit vector is free.
|
||||
// NOTE: This assumes legalization widens vXf32 vectors.
|
||||
if (MScalarTy == MVT::f32)
|
||||
for (unsigned i = 0, e = cast<FixedVectorType>(Ty)->getNumElements();
|
||||
i < e; i += 4)
|
||||
if (DemandedElts[i])
|
||||
Cost--;
|
||||
APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts);
|
||||
auto *LaneTy =
|
||||
FixedVectorType::get(Ty->getElementType(), NumEltsPerLane);
|
||||
|
||||
for (unsigned I = 0; I != NumLanesTotal; ++I) {
|
||||
APInt LaneEltMask = WidenedDemandedElts.extractBits(
|
||||
NumEltsPerLane, NumEltsPerLane * I);
|
||||
if (LaneEltMask.isNullValue())
|
||||
continue;
|
||||
// FIXME: we don't need to extract if all non-demanded elements
|
||||
// are legalization-inserted padding.
|
||||
if (!LaneEltMask.isAllOnes())
|
||||
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind,
|
||||
I * NumEltsPerLane, LaneTy);
|
||||
Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert,
|
||||
false);
|
||||
}
|
||||
|
||||
APInt AffectedLanes =
|
||||
APIntOps::ScaleBitMask(WidenedDemandedElts, NumLanesTotal);
|
||||
APInt FullyAffectedLegalVectors = APIntOps::ScaleBitMask(
|
||||
AffectedLanes, NumLegalVectors, /*MatchAllBits=*/true);
|
||||
for (int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
|
||||
for (unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
|
||||
unsigned I = NumLegalLanes * LegalVec + Lane;
|
||||
// No need to insert unaffected lane; or lane 0 of each legal vector
|
||||
// iff ALL lanes of that vector were affected and will be inserted.
|
||||
if (!AffectedLanes[I] ||
|
||||
(Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
|
||||
continue;
|
||||
Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, None, CostKind,
|
||||
I * NumEltsPerLane, LaneTy);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (LT.second.isVector()) {
|
||||
// Without fast insertion, we need to use MOVD/MOVQ to pass each demanded
|
||||
@@ -4477,39 +4504,36 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||
}
|
||||
|
||||
if (LT.second.isVector()) {
|
||||
int CostValue = *LT.first.getValue();
|
||||
assert(CostValue >= 0 && "Negative cost!");
|
||||
|
||||
unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
|
||||
assert(NumElts >= DemandedElts.getBitWidth() &&
|
||||
unsigned NumLegalElts =
|
||||
LT.second.getVectorNumElements() * NumLegalVectors;
|
||||
assert(NumLegalElts >= DemandedElts.getBitWidth() &&
|
||||
"Vector has been legalized to smaller element count");
|
||||
|
||||
// If we're extracting elements from a 128-bit subvector lane, we only need
|
||||
// to extract each lane once, not for every element.
|
||||
if (SizeInBits > 128) {
|
||||
assert((SizeInBits % 128) == 0 && "Illegal vector");
|
||||
unsigned NumLegal128Lanes = SizeInBits / 128;
|
||||
unsigned Num128Lanes = NumLegal128Lanes * CostValue;
|
||||
APInt WidenedDemandedElts = DemandedElts.zext(NumElts);
|
||||
unsigned Scale = NumElts / Num128Lanes;
|
||||
// If we're extracting elements from a 128-bit subvector lane,
|
||||
// we only need to extract each lane once, not for every element.
|
||||
if (LegalVectorBitWidth > LaneBitWidth) {
|
||||
unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
|
||||
unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
|
||||
assert((NumLegalElts % NumLanesTotal) == 0 &&
|
||||
"Unexpected elts per lane");
|
||||
unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
|
||||
|
||||
// Add cost for each demanded 128-bit subvector extraction.
|
||||
// Luckily this is a lot easier than for insertion.
|
||||
APInt DemandedUpper128Lanes =
|
||||
APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes);
|
||||
auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale);
|
||||
for (unsigned I = 0; I != Num128Lanes; ++I)
|
||||
if (DemandedUpper128Lanes[I])
|
||||
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind,
|
||||
I * Scale, Ty128);
|
||||
APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts);
|
||||
auto *LaneTy =
|
||||
FixedVectorType::get(Ty->getElementType(), NumEltsPerLane);
|
||||
|
||||
// Add all the demanded element extractions together, but adjust the
|
||||
// index to use the equivalent of the bottom 128 bit lane.
|
||||
for (unsigned I = 0; I != NumElts; ++I)
|
||||
if (WidenedDemandedElts[I]) {
|
||||
unsigned Idx = I % Scale;
|
||||
Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx);
|
||||
}
|
||||
for (unsigned I = 0; I != NumLanesTotal; ++I) {
|
||||
APInt LaneEltMask = WidenedDemandedElts.extractBits(
|
||||
NumEltsPerLane, I * NumEltsPerLane);
|
||||
if (LaneEltMask.isNullValue())
|
||||
continue;
|
||||
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind,
|
||||
I * NumEltsPerLane, LaneTy);
|
||||
Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, false,
|
||||
Extract);
|
||||
}
|
||||
|
||||
return Cost;
|
||||
}
|
||||
|
||||
@@ -584,8 +584,8 @@ define i32 @fma(i32 %arg) {
|
||||
; AVX-LABEL: 'fma'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
|
||||
|
||||
@@ -958,8 +958,8 @@ define i32 @fma(i32 %arg) {
|
||||
; AVX-LABEL: 'fma'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
|
||||
|
||||
@@ -892,8 +892,8 @@ define i32 @fma(i32 %arg) {
|
||||
; AVX-LABEL: 'fma'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
|
||||
|
||||
@@ -660,8 +660,8 @@ define i32 @frem(i32 %arg) {
|
||||
; AVX-LABEL: 'frem'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = frem <16 x float> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef
|
||||
@@ -671,8 +671,8 @@ define i32 @frem(i32 %arg) {
|
||||
; AVX512-LABEL: 'frem'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = frem <16 x float> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef
|
||||
@@ -1035,8 +1035,8 @@ define i32 @fma(i32 %arg) {
|
||||
; AVX-LABEL: 'fma'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
|
||||
|
||||
@@ -250,15 +250,15 @@ define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v4i64'
|
||||
@@ -299,15 +299,15 @@ define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i64'
|
||||
@@ -397,15 +397,15 @@ define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i32'
|
||||
@@ -446,15 +446,15 @@ define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i32'
|
||||
@@ -532,15 +532,15 @@ define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i16'
|
||||
@@ -581,15 +581,15 @@ define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v32i16'
|
||||
|
||||
@@ -250,15 +250,15 @@ define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v4i64'
|
||||
@@ -299,15 +299,15 @@ define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i64'
|
||||
@@ -397,15 +397,15 @@ define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i32'
|
||||
@@ -446,15 +446,15 @@ define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i32'
|
||||
@@ -532,15 +532,15 @@ define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i16'
|
||||
@@ -581,15 +581,15 @@ define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v32i16'
|
||||
|
||||
@@ -250,15 +250,15 @@ define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v4i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v4i64'
|
||||
@@ -299,15 +299,15 @@ define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i64'
|
||||
@@ -397,15 +397,15 @@ define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v8i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v8i32'
|
||||
@@ -446,15 +446,15 @@ define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i32'
|
||||
@@ -532,15 +532,15 @@ define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v16i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v16i16'
|
||||
@@ -581,15 +581,15 @@ define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) {
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX1-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX2-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; AVX512-LABEL: 'var_bitreverse_v32i16'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse
|
||||
;
|
||||
; XOP-LABEL: 'var_bitreverse_v32i16'
|
||||
|
||||
@@ -15,8 +15,8 @@ define i32 @f32(i32 %arg) {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%F32 = call float @llvm.maxnum.f32(float undef, float undef)
|
||||
|
||||
@@ -15,8 +15,8 @@ define i32 @f32(i32 %arg) {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%F32 = call float @llvm.minnum.f32(float undef, float undef)
|
||||
|
||||
@@ -266,8 +266,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
|
||||
@@ -276,8 +276,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
|
||||
@@ -286,8 +286,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
|
||||
@@ -296,8 +296,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
|
||||
@@ -306,8 +306,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
|
||||
@@ -316,8 +316,8 @@ define void @casts() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX2-LABEL: 'casts'
|
||||
@@ -369,8 +369,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
|
||||
@@ -379,8 +379,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
|
||||
@@ -389,8 +389,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
|
||||
@@ -399,8 +399,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
|
||||
@@ -409,8 +409,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
|
||||
@@ -419,8 +419,8 @@ define void @casts() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512F-LABEL: 'casts'
|
||||
@@ -472,8 +472,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef)
|
||||
@@ -482,8 +482,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef)
|
||||
@@ -492,8 +492,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef)
|
||||
@@ -502,8 +502,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef)
|
||||
@@ -512,8 +512,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef)
|
||||
@@ -522,8 +522,8 @@ define void @casts() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512DQ-LABEL: 'casts'
|
||||
@@ -985,22 +985,22 @@ define void @fp16() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
@@ -1034,28 +1034,28 @@ define void @fp16() {
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512DQ-LABEL: 'fp16'
|
||||
@@ -1087,28 +1087,28 @@ define void @fp16() {
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; SLM-LABEL: 'fp16'
|
||||
|
||||
@@ -28,15 +28,15 @@ define i32 @fptosi_double_i64(i32 %arg) {
|
||||
; AVX-LABEL: 'fptosi_double_i64'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'fptosi_double_i64'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'fptosi_double_i64'
|
||||
@@ -216,17 +216,17 @@ define i32 @fptosi_float_i64(i32 %arg) {
|
||||
; AVX-LABEL: 'fptosi_float_i64'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'fptosi_float_i64'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'fptosi_float_i64'
|
||||
|
||||
@@ -28,22 +28,22 @@ define i32 @fptoui_double_i64(i32 %arg) {
|
||||
; AVX1-LABEL: 'fptoui_double_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'fptoui_double_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui double undef to i64
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'fptoui_double_i64'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'fptoui_double_i64'
|
||||
@@ -223,25 +223,25 @@ define i32 @fptoui_float_i64(i32 %arg) {
|
||||
; AVX1-LABEL: 'fptoui_float_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'fptoui_float_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui float undef to i64
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'fptoui_float_i64'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'fptoui_float_i64'
|
||||
|
||||
@@ -28,33 +28,33 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 193 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
;
|
||||
; AVX2-SLOWGATHER-LABEL: 'test'
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 33 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
;
|
||||
; AVX2-FASTGATHER-LABEL: 'test'
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 108 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 53 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 112 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 224 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -34,17 +34,17 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 97 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
;
|
||||
; AVX2-SLOWGATHER-LABEL: 'test'
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 17 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
;
|
||||
; AVX2-FASTGATHER-LABEL: 'test'
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
|
||||
|
||||
@@ -33,18 +33,18 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 400 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
;
|
||||
; AVX2-SLOWGATHER-LABEL: 'test'
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 10 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 20 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 40 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 80 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 9 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
;
|
||||
; AVX2-FASTGATHER-LABEL: 'test'
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
@@ -57,7 +57,7 @@ define void @test() {
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 15 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
|
||||
|
||||
@@ -36,7 +36,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX1: LV: Found an estimated cost of 385 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
;
|
||||
; AVX2-SLOWGATHER-LABEL: 'test'
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
@@ -44,7 +44,7 @@ define void @test() {
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 32 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-SLOWGATHER: LV: Found an estimated cost of 65 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
;
|
||||
; AVX2-FASTGATHER-LABEL: 'test'
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
@@ -52,7 +52,7 @@ define void @test() {
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX2-FASTGATHER: LV: Found an estimated cost of 105 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
@@ -60,8 +60,8 @@ define void @test() {
|
||||
; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 110 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 220 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -22,8 +22,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
|
||||
@@ -25,8 +25,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,8 +25,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,8 +25,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,25 +25,25 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 43 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,8 +25,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,25 +25,25 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -25,25 +25,25 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 248 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 118 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 236 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 104 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
|
||||
|
||||
@@ -23,9 +23,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -22,8 +22,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -22,15 +22,15 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
|
||||
@@ -22,10 +22,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -21,9 +21,9 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -21,9 +21,9 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -20,14 +20,14 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -20,8 +20,8 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -20,14 +20,14 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -20,14 +20,14 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 83 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -34,7 +34,7 @@ define void @test() {
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 325 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -42,8 +42,8 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -34,7 +34,7 @@ define void @test() {
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 455 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -42,8 +42,8 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -26,7 +26,7 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -34,7 +34,7 @@ define void @test() {
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 520 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
@@ -42,8 +42,8 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
|
||||
@@ -22,10 +22,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
|
||||
@@ -21,11 +21,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
|
||||
@@ -21,11 +21,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
|
||||
@@ -20,10 +20,10 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
|
||||
@@ -24,9 +24,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
|
||||
@@ -23,10 +23,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
|
||||
@@ -23,10 +23,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
|
||||
@@ -22,28 +22,28 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 178 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
|
||||
@@ -22,11 +22,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
|
||||
@@ -22,28 +22,28 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 249 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
|
||||
@@ -22,28 +22,28 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 69 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 138 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 284 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
|
||||
@@ -22,10 +22,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
|
||||
@@ -21,11 +21,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
|
||||
@@ -21,11 +21,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
|
||||
@@ -20,10 +20,10 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 57 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
|
||||
@@ -20,17 +20,17 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
|
||||
@@ -21,11 +21,11 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
|
||||
@@ -20,10 +20,10 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
|
||||
@@ -20,10 +20,10 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
|
||||
@@ -19,15 +19,15 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
|
||||
@@ -19,9 +19,9 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
|
||||
@@ -19,15 +19,15 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
|
||||
@@ -19,15 +19,15 @@ define void @test() {
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
|
||||
@@ -25,8 +25,8 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
|
||||
@@ -24,9 +24,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 101 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
|
||||
@@ -24,9 +24,9 @@ define void @test() {
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 67 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 134 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
|
||||
@@ -23,27 +23,27 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 167 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 338 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
|
||||
@@ -23,10 +23,10 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 201 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
@@ -43,7 +43,7 @@ define void @test() {
|
||||
; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
|
||||
@@ -23,27 +23,27 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 473 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
|
||||
@@ -23,27 +23,27 @@ define void @test() {
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 133 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 266 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 540 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
|
||||
@@ -50,7 +50,7 @@ define void @test() {
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
|
||||
|
||||
@@ -28,8 +28,8 @@ define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readon
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test1'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
|
||||
|
||||
@@ -176,12 +176,12 @@ define i32 @masked_load() {
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -235,12 +235,12 @@ define i32 @masked_load() {
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -790,24 +790,24 @@ define i32 @masked_gather() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -817,24 +817,24 @@ define i32 @masked_gather() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -856,12 +856,12 @@ define i32 @masked_gather() {
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -876,19 +876,19 @@ define i32 @masked_gather() {
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -910,12 +910,12 @@ define i32 @masked_gather() {
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1182,24 +1182,24 @@ define i32 @masked_expandload() {
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1209,24 +1209,24 @@ define i32 @masked_expandload() {
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1789,13 +1789,13 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind)
|
||||
; AVX1-LABEL: 'test_gather_16f32_const_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_const_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_const_mask'
|
||||
@@ -1833,13 +1833,13 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
|
||||
; AVX1-LABEL: 'test_gather_16f32_var_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_var_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_var_mask'
|
||||
@@ -1877,13 +1877,13 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
|
||||
; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
@@ -1927,7 +1927,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
|
||||
@@ -1935,7 +1935,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_const_mask2'
|
||||
|
||||
@@ -176,12 +176,12 @@ define i32 @masked_load() {
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -235,12 +235,12 @@ define i32 @masked_load() {
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -790,24 +790,24 @@ define i32 @masked_gather() {
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -817,24 +817,24 @@ define i32 @masked_gather() {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -856,12 +856,12 @@ define i32 @masked_gather() {
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -876,19 +876,19 @@ define i32 @masked_gather() {
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -910,12 +910,12 @@ define i32 @masked_gather() {
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
|
||||
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1182,24 +1182,24 @@ define i32 @masked_expandload() {
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1209,24 +1209,24 @@ define i32 @masked_expandload() {
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
|
||||
@@ -1789,13 +1789,13 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind)
|
||||
; AVX1-LABEL: 'test_gather_16f32_const_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_const_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_const_mask'
|
||||
@@ -1833,13 +1833,13 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
|
||||
; AVX1-LABEL: 'test_gather_16f32_var_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_var_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_var_mask'
|
||||
@@ -1877,13 +1877,13 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
|
||||
; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
|
||||
@@ -1927,7 +1927,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
|
||||
@@ -1935,7 +1935,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
|
||||
;
|
||||
; SKL-LABEL: 'test_gather_16f32_const_mask2'
|
||||
|
||||
@@ -35,8 +35,8 @@ define i32 @powi_var(i32 %arg) {
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 %arg)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 %arg)
|
||||
@@ -48,8 +48,8 @@ define i32 @powi_var(i32 %arg) {
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 %arg)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 %arg)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -66,10 +66,10 @@ define void @replication_i16_stride2() nounwind "min-legal-vector-width"="256" {
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride2'
|
||||
@@ -176,11 +176,11 @@ define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" {
|
||||
; AVX-LABEL: 'replication_i16_stride3'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride3'
|
||||
@@ -196,11 +196,11 @@ define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" {
|
||||
; AVX512FVEC256-LABEL: 'replication_i16_stride3'
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 444 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512BWVEC512-LABEL: 'replication_i16_stride3'
|
||||
@@ -287,11 +287,11 @@ define void @replication_i16_stride4() nounwind "min-legal-vector-width"="256" {
|
||||
; AVX-LABEL: 'replication_i16_stride4'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride4'
|
||||
@@ -397,12 +397,12 @@ define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX-LABEL: 'replication_i16_stride5'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride5'
|
||||
@@ -417,12 +417,12 @@ define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX512FVEC256-LABEL: 'replication_i16_stride5'
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 752 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 366 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 732 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512BWVEC512-LABEL: 'replication_i16_stride5'
|
||||
@@ -508,12 +508,12 @@ define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX-LABEL: 'replication_i16_stride6'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride6'
|
||||
@@ -528,12 +528,12 @@ define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX512FVEC256-LABEL: 'replication_i16_stride6'
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 450 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 900 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 438 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 876 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512BWVEC512-LABEL: 'replication_i16_stride6'
|
||||
@@ -619,12 +619,12 @@ define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX-LABEL: 'replication_i16_stride7'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride7'
|
||||
@@ -639,12 +639,12 @@ define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX512FVEC256-LABEL: 'replication_i16_stride7'
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1048 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1020 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512BWVEC512-LABEL: 'replication_i16_stride7'
|
||||
@@ -730,12 +730,12 @@ define void @replication_i16_stride8() nounwind "min-legal-vector-width"="256" {
|
||||
;
|
||||
; AVX-LABEL: 'replication_i16_stride8'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX512FVEC512-LABEL: 'replication_i16_stride8'
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user