[X86] getTargetConstantBitsFromNode - support extracting fp data from ConstantDataSequential

Fixes issue introduced by 0f8e0f4228 where SimplifyDemandedBits could crash when trying to extract fp data from broadcasted constants
This commit is contained in:
Simon Pilgrim
2023-05-30 11:38:20 +01:00
parent d0a4dcf52f
commit 95661b9c75
2 changed files with 54 additions and 2 deletions

View File

@@ -7453,9 +7453,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
Type *Ty = CDS->getType();
Mask = APInt::getZero(Ty->getPrimitiveSizeInBits());
unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
Type *EltTy = CDS->getElementType();
bool IsInteger = EltTy->isIntegerTy();
bool IsFP =
EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
if (!IsInteger && !IsFP)
return false;
unsigned EltBits = EltTy->getPrimitiveSizeInBits();
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
if (IsInteger)
Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
else
Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
I * EltBits);
return true;
}
return false;

View File

@@ -914,6 +914,48 @@ define double @broadcast_scale_xyz(ptr nocapture readonly, ptr nocapture readonl
ret double %14
}
;
; Broadcast v2f32 non-uniform constant via vmovddup
;
define void @fmul_by_v2f32_broadcast() nounwind {
; X86-LABEL: fmul_by_v2f32_broadcast:
; X86: ## %bb.0:
; X86-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0]
; X86-NEXT: ## xmm0 = mem[0,0]
; X86-NEXT: ## implicit-def: $xmm1
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1
; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; X86-NEXT: vmulps %xmm0, %xmm2, %xmm2
; X86-NEXT: vmovlps %xmm2, (%eax)
; X86-NEXT: vmulps %xmm0, %xmm1, %xmm1
; X86-NEXT: vmovlps %xmm1, (%eax)
; X86-NEXT: jmp LBB42_1
;
; X64-LABEL: fmul_by_v2f32_broadcast:
; X64: ## %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0]
; X64-NEXT: ## xmm0 = mem[0,0]
; X64-NEXT: ## implicit-def: $xmm1
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1
; X64-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; X64-NEXT: vmulps %xmm0, %xmm2, %xmm2
; X64-NEXT: vmovlps %xmm2, (%rax)
; X64-NEXT: vmulps %xmm0, %xmm1, %xmm1
; X64-NEXT: vmovlps %xmm1, (%rax)
; X64-NEXT: jmp LBB42_1
br label %1
1:
%2 = phi <2 x float> [ undef, %0 ], [ %5, %1 ]
%3 = load <2 x float>, ptr poison, align 8
%4 = fmul <2 x float> %3, <float 3.100000e+01, float 0.000000e+00>
store <2 x float> %4, ptr poison, align 8
%5 = fmul <2 x float> %2, <float 3.100000e+01, float 0.000000e+00>
store <2 x float> %5, ptr poison, align 8
br label %1
}
;
; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
;