GlobalISel: Handle lowering non-power-of-2 extloads

This commit is contained in:
Matt Arsenault
2021-06-10 09:28:20 -04:00
parent eac1670739
commit 47269da5d8
5 changed files with 446 additions and 54 deletions

View File

@@ -2849,60 +2849,62 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
return Legalized;
}
if (DstTy.getSizeInBits() != MMO.getSizeInBits())
// This load needs splitting into power of 2 sized loads.
if (DstTy.isVector())
return UnableToLegalize;
if (isPowerOf2_32(MemSizeInBits))
return UnableToLegalize; // Don't know what we're being asked to do.
// Big endian lowering not implemented.
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
if (MI.getOpcode() == TargetOpcode::G_LOAD) {
// This load needs splitting into power of 2 sized loads.
if (DstTy.isVector())
return UnableToLegalize;
if (isPowerOf2_32(DstTy.getSizeInBits()))
return UnableToLegalize; // Don't know what we're being asked to do.
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
// type.
// E.g. v1 = i24 load =>
// v2 = i32 zextload (2 byte)
// v3 = i32 load (1 byte)
// v4 = i32 shl v3, 16
// v5 = i32 or v4, v2
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
// type.
// E.g. v1 = i24 load =>
// v2 = i32 zextload (2 byte)
// v3 = i32 load (1 byte)
// v4 = i32 shl v3, 16
// v5 = i32 or v4, v2
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
LLT PtrTy = MRI.getType(PtrReg);
unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
LLT AnyExtTy = LLT::scalar(AnyExtSize);
auto LargeLoad = MIRBuilder.buildLoadInstr(
TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
LLT PtrTy = MRI.getType(PtrReg);
unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
LLT AnyExtTy = LLT::scalar(AnyExtSize);
auto LargeLoad = MIRBuilder.buildLoadInstr(
TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
auto SmallLoad = MIRBuilder.buildLoadInstr(
MI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr,
*SmallMMO);
auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
else {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult

View File

@@ -1274,6 +1274,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lower();
}
// FIXME: Unaligned accesses not lowered.
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8},
{S32, GlobalPtr, S16, 2 * 8},
@@ -1302,7 +1303,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ExtLoads.clampScalar(0, S32, S32)
.widenScalarToNextPow2(0)
.unsupportedIfMemSizeNotPow2()
.lower();
auto &Atomics = getActionDefinitionsBuilder(

View File

@@ -15721,6 +15721,374 @@ body: |
$vgpr0 = COPY %1
...
---
name: test_ext_load_global_s32_from_s24_align1
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s32_from_s24_align1
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; SI: $vgpr0 = COPY [[COPY4]](s32)
; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CI-HSA: $vgpr0 = COPY [[OR]](s32)
; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; CI-MESA: $vgpr0 = COPY [[COPY4]](s32)
; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; VI: $vgpr0 = COPY [[COPY1]](s32)
; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (load (s24), align 1, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_ext_load_global_s32_from_s24_align2
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s32_from_s24_align2
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; SI: $vgpr0 = COPY [[COPY5]](s32)
; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CI-HSA: $vgpr0 = COPY [[OR]](s32)
; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CI-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; CI-MESA: $vgpr0 = COPY [[COPY5]](s32)
; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; VI: $vgpr0 = COPY [[COPY1]](s32)
; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
; GFX9-MESA: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (load (s24), align 2, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_ext_load_global_s32_from_s24_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s32_from_s24_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; SI: $vgpr0 = COPY [[LOAD]](s32)
; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; VI: $vgpr0 = COPY [[LOAD]](s32)
; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_ext_load_global_s64_from_1_align4
body: |

View File

@@ -6,7 +6,6 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16)
@@ -64,12 +63,24 @@ body: |
; GFX8-LABEL: name: test_sextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX8: $vgpr0 = COPY [[OR]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1

View File

@@ -6,7 +6,6 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4)
@@ -65,12 +64,24 @@ body: |
; GFX8-LABEL: name: test_zextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX8: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX8: $vgpr0 = COPY [[OR]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1