From 81eb63ad7f328a8c2f0293d85aecde18b6edf16a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 25 Jul 2025 15:49:47 +0100 Subject: [PATCH] [CostModel][X86] Complicate the cross lane single/two source shuffle masks Try to ensure shuffle masks don't simplify too much to easier shuffle kinds when splitting --- .../CostModel/X86/shuffle-single-src-fp16.ll | 16 +- .../CostModel/X86/shuffle-single-src.ll | 228 +++++++++-------- .../Analysis/CostModel/X86/shuffle-two-src.ll | 232 ++++++++---------- 3 files changed, 237 insertions(+), 239 deletions(-) diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll index 007aa59bc497..21d576677de0 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll @@ -3,15 +3,15 @@ define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) { ; CHECK-LABEL: 'test_vXf16' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> - %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> - %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> - %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> ret void } diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index 3727d7b64d7a..a7732920dd24 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -21,42 +21,42 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> - %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> - %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ret void } @@ -64,79 +64,93 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> - %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ret void } define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> - %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ret void } @@ -144,48 +158,48 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE-LABEL: 'test_vXi32' ; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found costs of 18 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> - %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> - %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> - %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ret void } @@ -193,90 +207,90 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:26 Lat:26 SizeLat:26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:42 CodeSize:56 Lat:56 SizeLat:56 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:72 CodeSize:94 Lat:94 SizeLat:94 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 39 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found costs of 35 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi16' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 63 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 16 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 99 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 44 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi16' ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> - %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> - %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> - %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> - %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ret void } @@ -286,8 +300,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found costs of 5 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:19 Lat:19 SizeLat:19 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:37 Lat:37 SizeLat:37 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:91 Lat:91 SizeLat:91 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -295,8 +309,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -304,8 +318,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -313,8 +327,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -322,8 +336,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -331,8 +345,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi8' @@ -340,8 +354,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi8' @@ -349,8 +363,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' @@ -358,16 +372,16 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> - %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> - %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ret void } diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index 56afc013e6e0..30163163b0db 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -22,41 +22,41 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> - %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ret void } @@ -65,41 +65,41 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, ; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi64' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi64' ; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi64' ; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> - %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ret void } @@ -107,129 +107,113 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-LABEL: 'test_vXf32' ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXf32' ; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXf32' ; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXf32' ; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXf32' ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXf32' ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf32' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> - %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> - %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> - %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> ret void } define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { -; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; -; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; -; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> -; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found costs of 23 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi32' ; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi32' ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi32' ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi32' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> -; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> - %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> - %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> - %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> ret void } @@ -238,89 +222,89 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:11 Lat:11 SizeLat:11 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:27 Lat:27 SizeLat:27 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:36 Lat:36 SizeLat:36 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:51 Lat:51 SizeLat:51 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:69 CodeSize:91 Lat:91 SizeLat:91 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi16' ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 37 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi16' ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found costs of 33 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi16' ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 54 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi16' ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 90 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi16' ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found costs of 17 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi16' ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 168 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi16' ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> - %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> - %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> - %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ret void } @@ -330,8 +314,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE2-NEXT: Cost Model: Found costs of 4 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE2-NEXT: Cost Model: Found costs of 7 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:13 Lat:13 SizeLat:13 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:41 Lat:41 SizeLat:41 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:42 Lat:42 SizeLat:42 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found costs of RThru:66 CodeSize:78 Lat:78 SizeLat:78 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSSE3-LABEL: 'test_vXi8' @@ -339,8 +323,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SSE42-LABEL: 'test_vXi8' @@ -348,8 +332,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; SSE42-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found costs of 10 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; XOP-LABEL: 'test_vXi8' @@ -357,8 +341,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; XOP-NEXT: Cost Model: Found costs of 13 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found costs of 36 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX1-LABEL: 'test_vXi8' @@ -366,8 +350,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX1-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found costs of 54 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX2-LABEL: 'test_vXi8' @@ -375,8 +359,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512F-LABEL: 'test_vXi8' @@ -384,8 +368,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512BW-LABEL: 'test_vXi8' @@ -393,8 +377,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' @@ -402,15 +386,15 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> - %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> - %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ret void }