From 8f28e8069c4ba1110daee8bddc4d5049b6d4646e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 29 Dec 2022 17:45:45 +0200 Subject: [PATCH] [BPF] support for BPF_ST instruction in codegen Generate store immediate instruction when CPUv4 is enabled. For example: $ cat test.c struct foo { unsigned char b; unsigned short h; unsigned int w; unsigned long d; }; void bar(volatile struct foo *p) { p->b = 1; p->h = 2; p->w = 3; p->d = 4; } $ clang -O2 --target=bpf -mcpu=v4 test.c -c -o - | llvm-objdump -d - ... 0000000000000000 : 0: 72 01 00 00 01 00 00 00 *(u8 *)(r1 + 0x0) = 0x1 1: 6a 01 02 00 02 00 00 00 *(u16 *)(r1 + 0x2) = 0x2 2: 62 01 04 00 03 00 00 00 *(u32 *)(r1 + 0x4) = 0x3 3: 7a 01 08 00 04 00 00 00 *(u64 *)(r1 + 0x8) = 0x4 4: 95 00 00 00 00 00 00 00 exit Take special care to: - apply `BPFMISimplifyPatchable::checkADDrr` rewrite for BPF_ST - validate immediate value when BPF_ST write is 64-bit: BPF interprets `(BPF_ST | BPF_MEM | BPF_DW)` writes as writes with sign extension. Thus it is fine to generate such write when immediate is -1, but it is incorrect to generate such write when immediate is +0xffff_ffff. This commit was previously reverted in e66affa17e32. The reason for revert was an unrelated bug in BPF backend, triggered by test case added in this commit if LLVM is built with LLVM_ENABLE_EXPENSIVE_CHECKS. The bug was fixed in D157806. Differential Revision: https://reviews.llvm.org/D140804 --- llvm/lib/Target/BPF/BPFInstrInfo.td | 55 +++++- .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 7 +- llvm/lib/Target/BPF/BPFSubtarget.cpp | 5 + llvm/lib/Target/BPF/BPFSubtarget.h | 3 +- .../CodeGen/BPF/CORE/field-reloc-st-imm.ll | 156 ++++++++++++++++++ llvm/test/CodeGen/BPF/store_imm.ll | 104 ++++++++++++ 6 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll create mode 100644 llvm/test/CodeGen/BPF/store_imm.ll diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index ed2caeba1892..fd5a4dee2952 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -59,6 +59,7 @@ def BPFHasBswap : Predicate<"Subtarget->hasBswap()">; def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">; def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">; def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">; +def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -75,6 +76,12 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{return isInt<32>(N->getSExtValue()); }]>; def i32immSExt32 : PatLeaf<(i32 imm), [{return isInt<32>(N->getSExtValue()); }]>; +def i64immZExt32 : PatLeaf<(i64 imm), + [{return isUInt<32>(N->getZExtValue()); }]>; + +def imm_to_i64 : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; // Addressing modes. def ADDRri : ComplexPattern; @@ -449,7 +456,7 @@ class STORE Pattern> } class STOREi64 - : STORE; + : STORE; let Predicates = [BPFNoALU32] in { def STW : STOREi64; @@ -458,6 +465,50 @@ let Predicates = [BPFNoALU32] in { } def STD : STOREi64; +class STORE_imm + : TYPE_LD_ST { + bits<20> addr; + bits<32> imm; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{47-32} = addr{15-0}; // offset + let Inst{31-0} = imm; + let BPFClass = BPF_ST; +} + +let Predicates = [BPFHasStoreImm] in { + // Opcode (BPF_ST | BPF_MEM | BPF_DW) implies sign extension for + // value stored to memory: + // - it is fine to generate such write when immediate is -1 + // - it is incorrect to generate such write when immediate is + // +0xffff_ffff. + // + // In the latter case two instructions would be generated instead of + // one BPF_ST: + // rA = 0xffffffff ll ; LD_imm64 + // *(u64 *)(rB + 0) = rA ; STX + // + // For BPF_{B,H,W} the size of value stored matches size of the immediate. + def STD_imm : STORE_imm; + def STW_imm : STORE_imm; + def STH_imm : STORE_imm; + def STB_imm : STORE_imm; +} + +let Predicates = [BPFHasALU32, BPFHasStoreImm] in { + def : Pat<(store (i32 imm:$src), ADDRri:$dst), + (STW_imm (imm_to_i64 $src), ADDRri:$dst)>; + def : Pat<(truncstorei16 (i32 imm:$src), ADDRri:$dst), + (STH_imm (imm_to_i64 imm:$src), ADDRri:$dst)>; + def : Pat<(truncstorei8 (i32 imm:$src), ADDRri:$dst), + (STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>; +} + // LOAD instructions class LOAD Pattern> : TYPE_LD_ST Pattern> } class STOREi32 - : STORE32; + : STORE32; let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { def STW32 : STOREi32; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 514b605f6fa6..2af150ad45c2 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -93,6 +93,11 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } +static bool isST(unsigned Opcode) { + return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm || + Opcode == BPF::STW_imm || Opcode == BPF::STD_imm; +} + static bool isSTX32(unsigned Opcode) { return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32; } @@ -141,7 +146,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, COREOp = BPF::CORE_LD64; else if (isLDX32(Opcode)) COREOp = BPF::CORE_LD32; - else if (isSTX64(Opcode) || isSTX32(Opcode)) + else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode)) COREOp = BPF::CORE_ST; else continue; diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index b99f9069523e..ce02c831828e 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -33,6 +33,9 @@ static cl::opt Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden, cl::init(false), cl::desc("Disable sdiv/smod insns")); static cl::opt Disable_gotol("disable-gotol", cl::Hidden, cl::init(false), cl::desc("Disable gotol insn")); +static cl::opt + Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false), + cl::desc("Disable BPF_ST (immediate store) insn")); void BPFSubtarget::anchor() {} @@ -54,6 +57,7 @@ void BPFSubtarget::initializeEnvironment() { HasBswap = false; HasSdivSmod = false; HasGotol = false; + HasStoreImm = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -80,6 +84,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { HasBswap = !Disable_bswap; HasSdivSmod = !Disable_sdiv_smod; HasGotol = !Disable_gotol; + HasStoreImm = !Disable_StoreImm; return; } } diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h index 12749dd739e2..6e81daa4d955 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -57,7 +57,7 @@ protected: bool UseDwarfRIS; // whether cpu v4 insns are enabled. - bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol; + bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm; public: // This constructor initializes the data members to match that @@ -79,6 +79,7 @@ public: bool hasBswap() const { return HasBswap; } bool hasSdivSmod() const { return HasSdivSmod; } bool hasGotol() const { return HasGotol; } + bool hasStoreImm() const { return HasStoreImm; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { diff --git a/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll b/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll new file mode 100644 index 000000000000..73a8903298a1 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll @@ -0,0 +1,156 @@ +; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s + +; Make sure that CO-RE relocations had been generated correctly for +; BPF_ST (store immediate) instructions and that +; BPFMISimplifyPatchable optimizations had been applied. +; +; Generated from the following source code: +; +; #define __pai __attribute__((preserve_access_index)) +; +; struct foo { +; unsigned char b; +; unsigned short h; +; unsigned int w; +; unsigned long d; +; } __pai; +; +; void bar(volatile struct foo *p) { +; p->b = 1; +; p->h = 2; +; p->w = 3; +; p->d = 4; +; } +; +; Using the following command: +; +; clang -g -O2 -S -emit-llvm -mcpu=v4 --target=bpfel test.c + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" + +@"llvm.foo:0:0$0:0" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:2$0:1" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:4$0:2" = external global i64, !llvm.preserve.access.index !0 #0 +@"llvm.foo:0:8$0:3" = external global i64, !llvm.preserve.access.index !0 #0 + +; Function Attrs: nofree nounwind +define dso_local void @bar(ptr noundef %p) local_unnamed_addr #1 !dbg !18 { +entry: + call void @llvm.dbg.value(metadata ptr %p, metadata !24, metadata !DIExpression()), !dbg !25 + %0 = load i64, ptr @"llvm.foo:0:0$0:0", align 8 + %1 = getelementptr i8, ptr %p, i64 %0 + %2 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %1) + store volatile i8 1, ptr %2, align 8, !dbg !26, !tbaa !27 + %3 = load i64, ptr @"llvm.foo:0:2$0:1", align 8 + %4 = getelementptr i8, ptr %p, i64 %3 + %5 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 1, ptr %4) + store volatile i16 2, ptr %5, align 2, !dbg !34, !tbaa !35 + %6 = load i64, ptr @"llvm.foo:0:4$0:2", align 8 + %7 = getelementptr i8, ptr %p, i64 %6 + %8 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %7) + store volatile i32 3, ptr %8, align 4, !dbg !36, !tbaa !37 + %9 = load i64, ptr @"llvm.foo:0:8$0:3", align 8 + %10 = getelementptr i8, ptr %p, i64 %9 + %11 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %10) + store volatile i64 4, ptr %11, align 8, !dbg !38, !tbaa !39 + ret void, !dbg !40 +} + +; CHECK: [[L0:.Ltmp.*]]: +; CHECK: *(u8 *)(r1 + 0) = 1 +; CHECK: [[L2:.Ltmp.*]]: +; CHECK: *(u16 *)(r1 + 2) = 2 +; CHECK: [[L4:.Ltmp.*]]: +; CHECK: *(u32 *)(r1 + 4) = 3 +; CHECK: [[L6:.Ltmp.*]]: +; CHECK: *(u64 *)(r1 + 8) = 4 + +; CHECK: .section .BTF +; ... +; CHECK: .long [[FOO:.*]] # BTF_KIND_STRUCT(id = [[FOO_ID:.*]]) +; ... +; CHECK: .ascii "foo" # string offset=[[FOO]] +; CHECK: .ascii ".text" # string offset=[[TEXT:.*]] +; CHECK: .ascii "0:0" # string offset=[[S1:.*]] +; CHECK: .ascii "0:1" # string offset=[[S2:.*]] +; CHECK: .ascii "0:2" # string offset=[[S3:.*]] +; CHECK: .ascii "0:3" # string offset=[[S4:.*]] + +; CHECK: .section .BTF.ext +; ... +; CHECK: .long [[#]] # FieldReloc +; CHECK-NEXT: .long [[TEXT]] # Field reloc section string offset=[[TEXT]] +; CHECK-NEXT: .long [[#]] +; CHECK-NEXT: .long [[L0]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S1]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L2]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S2]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L4]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S3]] +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long [[L6]] +; CHECK-NEXT: .long [[FOO_ID]] +; CHECK-NEXT: .long [[S4]] +; CHECK-NEXT: .long 0 + +; Function Attrs: nofree nosync nounwind memory(none) +declare ptr @llvm.bpf.passthrough.p0.p0(i32, ptr) #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #3 + +attributes #0 = { "btf_ama" } +attributes #1 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" } +attributes #2 = { nofree nosync nounwind memory(none) } +attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!11} +!llvm.module.flags = !{!12, !13, !14, !15, !16} +!llvm.ident = !{!17} + +!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 3, size: 128, elements: !2) +!1 = !DIFile(filename: "some-file.c", directory: "/some/dir", checksumkind: CSK_MD5, checksum: "e5d03b4d39dfffadc6c607e956c37996") +!2 = !{!3, !5, !7, !9} +!3 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !0, file: !1, line: 4, baseType: !4, size: 8) +!4 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!5 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !0, file: !1, line: 5, baseType: !6, size: 16, offset: 16) +!6 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !0, file: !1, line: 6, baseType: !8, size: 32, offset: 32) +!8 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!9 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !0, file: !1, line: 7, baseType: !10, size: 64, offset: 64) +!10 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned) +!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.0.0 ...", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!12 = !{i32 7, !"Dwarf Version", i32 5} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = !{i32 1, !"wchar_size", i32 4} +!15 = !{i32 7, !"frame-pointer", i32 2} +!16 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!17 = !{!"clang version 18.0.0 ..."} +!18 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !11, retainedNodes: !23) +!19 = !DISubroutineType(types: !20) +!20 = !{null, !21} +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64) +!22 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !0) +!23 = !{!24} +!24 = !DILocalVariable(name: "p", arg: 1, scope: !18, file: !1, line: 10, type: !21) +!25 = !DILocation(line: 0, scope: !18) +!26 = !DILocation(line: 11, column: 8, scope: !18) +!27 = !{!28, !29, i64 0} +!28 = !{!"foo", !29, i64 0, !31, i64 2, !32, i64 4, !33, i64 8} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !{!"short", !29, i64 0} +!32 = !{!"int", !29, i64 0} +!33 = !{!"long", !29, i64 0} +!34 = !DILocation(line: 12, column: 8, scope: !18) +!35 = !{!28, !31, i64 2} +!36 = !DILocation(line: 13, column: 8, scope: !18) +!37 = !{!28, !32, i64 4} +!38 = !DILocation(line: 14, column: 8, scope: !18) +!39 = !{!28, !33, i64 8} +!40 = !DILocation(line: 15, column: 1, scope: !18) diff --git a/llvm/test/CodeGen/BPF/store_imm.ll b/llvm/test/CodeGen/BPF/store_imm.ll new file mode 100644 index 000000000000..778beca176f9 --- /dev/null +++ b/llvm/test/CodeGen/BPF/store_imm.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=bpfel -mcpu=v4 -show-mc-encoding | FileCheck %s + +target triple = "bpf" + +define void @byte(ptr %p0) { +; CHECK-LABEL: byte: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u8 *)(r1 + 0) = 1 # encoding: [0x72,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u8 *)(r1 + 1) = 255 # encoding: [0x72,0x01,0x01,0x00,0xff,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 1 + + store volatile i8 1, ptr %p0, align 1 + store volatile i8 -1, ptr %p1, align 1 + + unreachable +} + +define void @half(ptr, ptr %p0) { +; CHECK-LABEL: half: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u16 *)(r2 + 0) = 1 # encoding: [0x6a,0x02,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r2 + 2) = 65535 # encoding: [0x6a,0x02,0x02,0x00,0xff,0xff,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 2 + + store volatile i16 1, ptr %p0, align 2 + store volatile i16 -1, ptr %p1, align 2 + + unreachable +} + +define void @word(ptr, ptr, ptr %p0) { +; CHECK-LABEL: word: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u32 *)(r3 + 0) = 1 # encoding: [0x62,0x03,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u32 *)(r3 + 4) = -1 # encoding: [0x62,0x03,0x04,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r3 + 8) = -2000000000 # encoding: [0x62,0x03,0x08,0x00,0x00,0x6c,0xca,0x88] +; CHECK-NEXT: *(u32 *)(r3 + 12) = -1 # encoding: [0x62,0x03,0x0c,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r3 + 12) = 0 # encoding: [0x62,0x03,0x0c,0x00,0x00,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 4 + %p2 = getelementptr i8, ptr %p0, i32 8 + %p3 = getelementptr i8, ptr %p0, i32 12 + + store volatile i32 1, ptr %p0, align 4 + store volatile i32 -1, ptr %p1, align 4 + store volatile i32 -2000000000, ptr %p2, align 4 + store volatile i32 4294967295, ptr %p3, align 4 + store volatile i32 4294967296, ptr %p3, align 4 + + unreachable +} + +define void @dword(ptr, ptr, ptr, ptr %p0) { +; CHECK-LABEL: dword: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u64 *)(r4 + 0) = 1 # encoding: [0x7a,0x04,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: *(u64 *)(r4 + 8) = -1 # encoding: [0x7a,0x04,0x08,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u64 *)(r4 + 16) = 2000000000 # encoding: [0x7a,0x04,0x10,0x00,0x00,0x94,0x35,0x77] +; CHECK-NEXT: *(u64 *)(r4 + 16) = -2000000000 # encoding: [0x7a,0x04,0x10,0x00,0x00,0x6c,0xca,0x88] +; CHECK-NEXT: r1 = 4294967295 ll # encoding: [0x18,0x01,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: *(u64 *)(r4 + 24) = r1 # encoding: [0x7b,0x14,0x18,0x00,0x00,0x00,0x00,0x00] + %p1 = getelementptr i8, ptr %p0, i32 8 + %p2 = getelementptr i8, ptr %p0, i32 16 + %p3 = getelementptr i8, ptr %p0, i32 24 + + store volatile i64 1, ptr %p0, align 8 + store volatile i64 -1, ptr %p1, align 8 + store volatile i64 2000000000, ptr %p2, align 8 + store volatile i64 -2000000000, ptr %p2, align 8 + store volatile i64 4294967295, ptr %p3, align 8 + + unreachable +} + +define void @unaligned(ptr %p0) { +; CHECK-LABEL: unaligned: +; CHECK: # %bb.0: +; CHECK-NEXT: *(u8 *)(r1 + 1) = 255 # encoding: [0x72,0x01,0x01,0x00,0xff,0x00,0x00,0x00] +; CHECK-NEXT: *(u8 *)(r1 + 0) = 254 # encoding: [0x72,0x01,0x00,0x00,0xfe,0x00,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r1 + 10) = 65535 # encoding: [0x6a,0x01,0x0a,0x00,0xff,0xff,0x00,0x00] +; CHECK-NEXT: *(u16 *)(r1 + 8) = 65534 # encoding: [0x6a,0x01,0x08,0x00,0xfe,0xff,0x00,0x00] +; CHECK-NEXT: *(u32 *)(r1 + 20) = -1 # encoding: [0x62,0x01,0x14,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: *(u32 *)(r1 + 16) = -2 # encoding: [0x62,0x01,0x10,0x00,0xfe,0xff,0xff,0xff] + %p1 = getelementptr i8, ptr %p0, i32 8 + %p2 = getelementptr i8, ptr %p0, i32 16 + + store volatile i16 -2, ptr %p0, align 1 + store volatile i32 -2, ptr %p1, align 2 + store volatile i64 -2, ptr %p2, align 4 + + unreachable +} + +define void @inline_asm(ptr %p0) { +; CHECK-LABEL: inline_asm: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: *(u32 *)(r0 + 42) = 7 # encoding: [0x62,0x00,0x2a,0x00,0x07,0x00,0x00,0x00] +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP + call void asm "*(u32 *)(r0 + 42) = 7;", "~{r0},~{mem}"() + + unreachable +}