diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9b433280d5d9..a89ab94c9e0d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7453,9 +7453,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (auto *CDS = dyn_cast(Cst)) { Type *Ty = CDS->getType(); Mask = APInt::getZero(Ty->getPrimitiveSizeInBits()); - unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits(); + Type *EltTy = CDS->getElementType(); + bool IsInteger = EltTy->isIntegerTy(); + bool IsFP = + EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); + if (!IsInteger && !IsFP) + return false; + unsigned EltBits = EltTy->getPrimitiveSizeInBits(); for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) - Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits); + if (IsInteger) + Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits); + else + Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(), + I * EltBits); return true; } return false; diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index 54bce767f1fc..b442a6337e3b 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -914,6 +914,48 @@ define double @broadcast_scale_xyz(ptr nocapture readonly, ptr nocapture readonl ret double %14 } +; +; Broadcast v2f32 non-uniform constant via vmovddup +; +define void @fmul_by_v2f32_broadcast() nounwind { +; X86-LABEL: fmul_by_v2f32_broadcast: +; X86: ## %bb.0: +; X86-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0] +; X86-NEXT: ## xmm0 = mem[0,0] +; X86-NEXT: ## implicit-def: $xmm1 +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1 +; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X86-NEXT: vmulps %xmm0, %xmm2, %xmm2 +; X86-NEXT: vmovlps %xmm2, (%eax) +; X86-NEXT: vmulps %xmm0, %xmm1, %xmm1 +; X86-NEXT: vmovlps %xmm1, (%eax) +; X86-NEXT: jmp LBB42_1 +; +; X64-LABEL: fmul_by_v2f32_broadcast: +; X64: ## %bb.0: +; X64-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0] +; X64-NEXT: ## xmm0 = mem[0,0] +; X64-NEXT: ## implicit-def: $xmm1 +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1 +; X64-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X64-NEXT: vmulps %xmm0, %xmm2, %xmm2 +; X64-NEXT: vmovlps %xmm2, (%rax) +; X64-NEXT: vmulps %xmm0, %xmm1, %xmm1 +; X64-NEXT: vmovlps %xmm1, (%rax) +; X64-NEXT: jmp LBB42_1 + br label %1 +1: + %2 = phi <2 x float> [ undef, %0 ], [ %5, %1 ] + %3 = load <2 x float>, ptr poison, align 8 + %4 = fmul <2 x float> %3, + store <2 x float> %4, ptr poison, align 8 + %5 = fmul <2 x float> %2, + store <2 x float> %5, ptr poison, align 8 + br label %1 +} + ; ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies. ;