From 7bced745766930e795e4e588366d84fe456311b3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 2 Dec 2025 12:05:59 +0000 Subject: [PATCH] [X86] combine-icmp.ll - fix copy+paste typo in concat_icmp_v64i8_v16i8 test (#170281) I changed the condcode for variety but failed to update the constant to prevent constant folding --- llvm/test/CodeGen/X86/combine-icmp.ll | 96 +++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/X86/combine-icmp.ll b/llvm/test/CodeGen/X86/combine-icmp.ll index ea1ab15f6e9b..603917b52cd5 100644 --- a/llvm/test/CodeGen/X86/combine-icmp.ll +++ b/llvm/test/CodeGen/X86/combine-icmp.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX1OR2,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX1OR2,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 define i4 @concat_icmp_v4i64_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ; SSE2-LABEL: concat_icmp_v4i64_v2i64: @@ -491,17 +491,89 @@ define i32 @concat_icmp_v32i16_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2 define i64 @concat_icmp_v64i8_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> %a3) { ; SSE-LABEL: concat_icmp_v64i8_v16i8: ; SSE: # %bb.0: -; SSE-NEXT: movq $-1, %rax +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; SSE-NEXT: movdqa %xmm0, %xmm5 +; SSE-NEXT: pmaxub %xmm4, %xmm5 +; SSE-NEXT: pcmpeqb %xmm0, %xmm5 +; SSE-NEXT: pmovmskb %xmm5, %eax +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pmaxub %xmm4, %xmm0 +; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pmaxub %xmm4, %xmm0 +; SSE-NEXT: pcmpeqb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %edx +; SSE-NEXT: pmaxub %xmm3, %xmm4 +; SSE-NEXT: pcmpeqb %xmm3, %xmm4 +; SSE-NEXT: pmovmskb %xmm4, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: orl %edx, %eax +; SSE-NEXT: shlq $32, %rax +; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: retq ; -; AVX-LABEL: concat_icmp_v64i8_v16i8: -; AVX: # %bb.0: -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: retq - %v0 = icmp uge <16 x i8> %a0, zeroinitializer - %v1 = icmp uge <16 x i8> %a1, zeroinitializer - %v2 = icmp uge <16 x i8> %a2, zeroinitializer - %v3 = icmp uge <16 x i8> %a3, zeroinitializer +; AVX1-LABEL: concat_icmp_v64i8_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpmaxub %xmm4, %xmm1, %xmm5 +; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpmaxub %xmm4, %xmm2, %xmm5 +; AVX1-NEXT: vpcmpeqb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpmaxub %xmm4, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vpmovmskb %xmm1, %ecx +; AVX1-NEXT: shll $16, %ecx +; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: vpmovmskb %xmm2, %edx +; AVX1-NEXT: vpmovmskb %xmm3, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %edx, %eax +; AVX1-NEXT: shlq $32, %rax +; AVX1-NEXT: orq %rcx, %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_icmp_v64i8_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpmaxub %xmm4, %xmm0, %xmm5 +; AVX2-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm0 +; AVX2-NEXT: vpmaxub %xmm4, %xmm1, %xmm5 +; AVX2-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm1 +; AVX2-NEXT: vpmaxub %xmm4, %xmm2, %xmm5 +; AVX2-NEXT: vpcmpeqb %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vpmaxub %xmm4, %xmm3, %xmm4 +; AVX2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: shlq $32, %rax +; AVX2-NEXT: orq %rcx, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v64i8_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512-NEXT: vpcmpnleub %xmm4, %xmm0, %k0 +; AVX512-NEXT: vpcmpnleub %xmm4, %xmm1, %k1 +; AVX512-NEXT: vpcmpnleub %xmm4, %xmm2, %k2 +; AVX512-NEXT: vpcmpnleub %xmm4, %xmm3, %k3 +; AVX512-NEXT: kunpckwd %k0, %k1, %k0 +; AVX512-NEXT: kunpckwd %k2, %k3, %k1 +; AVX512-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512-NEXT: kmovq %k0, %rax +; AVX512-NEXT: retq + %v0 = icmp ugt <16 x i8> %a0, splat (i8 15) + %v1 = icmp ugt <16 x i8> %a1, splat (i8 15) + %v2 = icmp ugt <16 x i8> %a2, splat (i8 15) + %v3 = icmp ugt <16 x i8> %a3, splat (i8 15) %v01 = shufflevector <16 x i1> %v0, <16 x i1> %v1, <32 x i32> %v23 = shufflevector <16 x i1> %v2, <16 x i1> %v3, <32 x i32> %v = shufflevector <32 x i1> %v01, <32 x i1> %v23, <64 x i32>