mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[X86] getTargetConstantBitsFromNode - support extracting fp data from ConstantDataSequential
Fixes issue introduced by 0f8e0f4228 where SimplifyDemandedBits could crash when trying to extract fp data from broadcasted constants
This commit is contained in:
@@ -7453,9 +7453,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||
if (auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
|
||||
Type *Ty = CDS->getType();
|
||||
Mask = APInt::getZero(Ty->getPrimitiveSizeInBits());
|
||||
unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
|
||||
Type *EltTy = CDS->getElementType();
|
||||
bool IsInteger = EltTy->isIntegerTy();
|
||||
bool IsFP =
|
||||
EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
|
||||
if (!IsInteger && !IsFP)
|
||||
return false;
|
||||
unsigned EltBits = EltTy->getPrimitiveSizeInBits();
|
||||
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
|
||||
Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
|
||||
if (IsInteger)
|
||||
Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
|
||||
else
|
||||
Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
|
||||
I * EltBits);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -914,6 +914,48 @@ define double @broadcast_scale_xyz(ptr nocapture readonly, ptr nocapture readonl
|
||||
ret double %14
|
||||
}
|
||||
|
||||
;
|
||||
; Broadcast v2f32 non-uniform constant via vmovddup
|
||||
;
|
||||
define void @fmul_by_v2f32_broadcast() nounwind {
|
||||
; X86-LABEL: fmul_by_v2f32_broadcast:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0]
|
||||
; X86-NEXT: ## xmm0 = mem[0,0]
|
||||
; X86-NEXT: ## implicit-def: $xmm1
|
||||
; X86-NEXT: .p2align 4, 0x90
|
||||
; X86-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; X86-NEXT: vmulps %xmm0, %xmm2, %xmm2
|
||||
; X86-NEXT: vmovlps %xmm2, (%eax)
|
||||
; X86-NEXT: vmulps %xmm0, %xmm1, %xmm1
|
||||
; X86-NEXT: vmovlps %xmm1, (%eax)
|
||||
; X86-NEXT: jmp LBB42_1
|
||||
;
|
||||
; X64-LABEL: fmul_by_v2f32_broadcast:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = [3.1E+1,0.0E+0,3.1E+1,0.0E+0]
|
||||
; X64-NEXT: ## xmm0 = mem[0,0]
|
||||
; X64-NEXT: ## implicit-def: $xmm1
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: LBB42_1: ## =>This Inner Loop Header: Depth=1
|
||||
; X64-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; X64-NEXT: vmulps %xmm0, %xmm2, %xmm2
|
||||
; X64-NEXT: vmovlps %xmm2, (%rax)
|
||||
; X64-NEXT: vmulps %xmm0, %xmm1, %xmm1
|
||||
; X64-NEXT: vmovlps %xmm1, (%rax)
|
||||
; X64-NEXT: jmp LBB42_1
|
||||
br label %1
|
||||
1:
|
||||
%2 = phi <2 x float> [ undef, %0 ], [ %5, %1 ]
|
||||
%3 = load <2 x float>, ptr poison, align 8
|
||||
%4 = fmul <2 x float> %3, <float 3.100000e+01, float 0.000000e+00>
|
||||
store <2 x float> %4, ptr poison, align 8
|
||||
%5 = fmul <2 x float> %2, <float 3.100000e+01, float 0.000000e+00>
|
||||
store <2 x float> %5, ptr poison, align 8
|
||||
br label %1
|
||||
}
|
||||
|
||||
;
|
||||
; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user