From 1f72fa29ecb4b283f449c8bf931fcaf0fa1069ee Mon Sep 17 00:00:00 2001 From: weiwei chen Date: Fri, 4 Apr 2025 22:44:07 -0400 Subject: [PATCH] [X86Backend][M68KBackend] Make Ctx in X86MCInstLower (M68KInstLower) the same as AsmPrinter.OutContext (#133352) In `X86MCInstLower::LowerMachineOperand`, a new `MCSymbol` can be created in `GetSymbolFromOperand(MO)` where `MO.getType()` is `MachineOperand::MO_ExternalSymbol` ``` case MachineOperand::MO_ExternalSymbol: return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); ``` at https://github.com/llvm/llvm-project/blob/725a7b664b92cd2e884806de5a08900b43d43cce/llvm/lib/Target/X86/X86MCInstLower.cpp#L196 However, this newly created symbol will not be marked properly with its `IsExternal` field since `Ctx.getOrCreateSymbol(Name)` doesn't know if the newly created `MCSymbol` is for `MachineOperand::MO_ExternalSymbol`. Looking at other backends, for example `Arch64MCInstLower` is doing for handling `MC_ExternalSymbol` https://github.com/llvm/llvm-project/blob/14c36db16fc090ef494ff6d8207562c414b40e30/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp#L366-L367 https://github.com/llvm/llvm-project/blob/14c36db16fc090ef494ff6d8207562c414b40e30/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp#L145-L148 It creates/gets the MCSymbol from `AsmPrinter.OutContext` instead of from `Ctx`. Moreover, `Ctx` for `AArch64MCLower` is the same as `AsmPrinter.OutContext`. https://github.com/llvm/llvm-project/blob/8e7d6baf0e013408be932758b4a5334c14a34086/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp#L100. This applies to almost all the other backends except X86 and M68k. ``` $git grep "MCInstLowering(" lib/Target/AArch64/AArch64AsmPrinter.cpp:100: : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this), lib/Target/AMDGPU/AMDGPUMCInstLower.cpp:223: AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); lib/Target/AMDGPU/AMDGPUMCInstLower.cpp:257: AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); lib/Target/AMDGPU/R600MCInstLower.cpp:52: R600MCInstLower MCInstLowering(OutContext, STI, *this); lib/Target/ARC/ARCAsmPrinter.cpp:41: MCInstLowering(&OutContext, *this) {} lib/Target/AVR/AVRAsmPrinter.cpp:196: AVRMCInstLower MCInstLowering(OutContext, *this); lib/Target/BPF/BPFAsmPrinter.cpp:144: BPFMCInstLower MCInstLowering(OutContext, *this); lib/Target/CSKY/CSKYAsmPrinter.cpp:41: : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this) {} lib/Target/Lanai/LanaiAsmPrinter.cpp:147: LanaiMCInstLower MCInstLowering(OutContext, *this); lib/Target/Lanai/LanaiAsmPrinter.cpp:184: LanaiMCInstLower MCInstLowering(OutContext, *this); lib/Target/MSP430/MSP430AsmPrinter.cpp:149: MSP430MCInstLower MCInstLowering(OutContext, *this); lib/Target/Mips/MipsAsmPrinter.h:126: : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp:695: WebAssemblyMCInstLower MCInstLowering(OutContext, *this); lib/Target/X86/X86MCInstLower.cpp:2200: X86MCInstLower MCInstLowering(*MF, *this); ``` This patch makes `X86MCInstLower` and `M68KInstLower` to have their `Ctx` from `AsmPrinter.OutContext` instead of getting it from `MF.getContext()` to be consistent with all the other backends. I think since normal use case (probably anything other than our un-conventional case) only handles one llvm module all the way through in the codegen pipeline till the end of code emission (AsmPrint), `AsmPrinter.OutContext` is the same as MachineFunction's MCContext, so this change is an NFC. ---- This fixes an error while running the generated code in ORC JIT for our use case with [MCLinker](https://youtu.be/yuSBEXkjfEA?si=HjgjkxJ9hLfnSvBj&t=813) (see more details below): https://github.com/llvm/llvm-project/pull/133291#issuecomment-2759200983 We (Mojo) are trying to do a MC level linking so that we break llvm module into multiple submodules to compile and codegen in parallel (technically into *.o files with symbol linkage type change), but instead of archive all of them into one `.a` file, we want to fix the symbol linkage type and still produce one *.o file. The parallel codegen pipeline generates the codegen data structures in their own `MCContext` (which is `Ctx` here). So if function `f` and `g` got split into different submodules, they will have different `Ctx`. And when we try to create an external symbol with the same name for each of them with `Ctx.getOrCreate(SymName)`, we will get two different `MCSymbol*` because `f` and `g`'s `MCContext` are different and they can't see each other. This is unfortunately not what we want for external symbols. Using `AsmPrinter.OutContext` helps, since it is shared, if we try to get or create the `MCSymbol` there, we'll be able to deduplicate. --- llvm/lib/Target/M68k/M68kMCInstLower.cpp | 2 +- llvm/lib/Target/X86/X86MCInstLower.cpp | 4 +- llvm/unittests/CodeGen/CMakeLists.txt | 1 + llvm/unittests/CodeGen/X86MCInstLowerTest.cpp | 174 ++++++++++++++++++ 4 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 llvm/unittests/CodeGen/X86MCInstLowerTest.cpp diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.cpp b/llvm/lib/Target/M68k/M68kMCInstLower.cpp index 957c0f9d3da8..8698fc0de471 100644 --- a/llvm/lib/Target/M68k/M68kMCInstLower.cpp +++ b/llvm/lib/Target/M68k/M68kMCInstLower.cpp @@ -33,7 +33,7 @@ using namespace llvm; #define DEBUG_TYPE "m68k-mc-inst-lower" M68kMCInstLower::M68kMCInstLower(MachineFunction &MF, M68kAsmPrinter &AP) - : Ctx(MF.getContext()), MF(MF), TM(MF.getTarget()), MAI(*TM.getMCAsmInfo()), + : Ctx(AP.OutContext), MF(MF), TM(MF.getTarget()), MAI(*TM.getMCAsmInfo()), AsmPrinter(AP) {} MCSymbol * diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 52332c46851c..d9945bdf2db6 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -142,8 +142,8 @@ void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { X86MCInstLower::X86MCInstLower(const MachineFunction &mf, X86AsmPrinter &asmprinter) - : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), - AsmPrinter(asmprinter) {} + : Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()), + MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {} MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { return AsmPrinter.MMI->getObjFileInfo(); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 4f580e7539f4..a972dc32c40a 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_unittest(CodeGenTests TargetOptionsTest.cpp TestAsmPrinter.cpp MLRegAllocDevelopmentFeatures.cpp + X86MCInstLowerTest.cpp ) add_subdirectory(GlobalISel) diff --git a/llvm/unittests/CodeGen/X86MCInstLowerTest.cpp b/llvm/unittests/CodeGen/X86MCInstLowerTest.cpp new file mode 100644 index 000000000000..f5a59b16b448 --- /dev/null +++ b/llvm/unittests/CodeGen/X86MCInstLowerTest.cpp @@ -0,0 +1,174 @@ +//===- llvm/unittest/CodeGen/X86MCInstLowerTest.cpp +//-------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestAsmPrinter.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "gtest/gtest.h" + +namespace llvm { + +class X86MCInstLowerTest : public testing::Test { +protected: + static void SetUpTestCase() { + InitializeAllTargetMCs(); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + } + + // Function to setup codegen pipeline and returns the AsmPrinter. + AsmPrinter *addPassesToEmitFile(llvm::legacy::PassManagerBase &PM, + llvm::raw_pwrite_stream &Out, + llvm::CodeGenFileType FileType, + llvm::MachineModuleInfoWrapperPass *MMIWP) { + TargetPassConfig *PassConfig = TM->createPassConfig(PM); + + PassConfig->setDisableVerify(true); + PM.add(PassConfig); + PM.add(MMIWP); + + if (PassConfig->addISelPasses()) + return nullptr; + + PassConfig->addMachinePasses(); + PassConfig->setInitialized(); + + MC.reset(new MCContext(TM->getTargetTriple(), TM->getMCAsmInfo(), + TM->getMCRegisterInfo(), TM->getMCSubtargetInfo())); + MC->setObjectFileInfo(TM->getObjFileLowering()); + TM->getObjFileLowering()->Initialize(*MC, *TM); + MC->setObjectFileInfo(TM->getObjFileLowering()); + + // Use a new MCContext for AsmPrinter for testing. + // AsmPrinter.OutContext will be different from + // MachineFunction's MCContext in MMIWP. + Expected> MCStreamerOrErr = + TM->createMCStreamer(Out, nullptr, FileType, *MC); + + if (auto Err = MCStreamerOrErr.takeError()) + return nullptr; + + AsmPrinter *Printer = + TM->getTarget().createAsmPrinter(*TM, std::move(*MCStreamerOrErr)); + + if (!Printer) + return nullptr; + + PM.add(Printer); + + return Printer; + } + + void SetUp() override { + // Module to compile. + const char *FooStr = R""""( + @G = external global i32 + + define i32 @foo() { + %1 = load i32, i32* @G; load the global variable + %2 = call i32 @f() + %3 = mul i32 %1, %2 + ret i32 %3 + } + + declare i32 @f() #0 + )""""; + StringRef AssemblyF(FooStr); + + // Get target triple for X86_64 + Triple TargetTriple("x86_64--"); + std::string Error; + const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); + // Skip the test if target is not built. + if (!T) + GTEST_SKIP(); + + // Get TargetMachine. + // Use Reloc::Model::PIC_ and CodeModel::Model::Large + // to get GOT during codegen as MO_ExternalSymbol. + TargetOptions Options; + TM = std::unique_ptr(T->createTargetMachine( + TargetTriple, "", "", Options, Reloc::Model::PIC_, + CodeModel::Model::Large, CodeGenOptLevel::Default)); + if (!TM) + GTEST_SKIP(); + + SMDiagnostic SMError; + + // Parse the module. + M = parseAssemblyString(AssemblyF, SMError, Context); + if (!M) + report_fatal_error(SMError.getMessage()); + M->setDataLayout(TM->createDataLayout()); + + // Get llvm::Function from M + Foo = M->getFunction("foo"); + if (!Foo) + report_fatal_error("foo?"); + + // Prepare the MCContext for codegen M. + // MachineFunction for Foo will have this MCContext. + MCFoo.reset(new MCContext(TargetTriple, TM->getMCAsmInfo(), + TM->getMCRegisterInfo(), + TM->getMCSubtargetInfo())); + MCFoo->setObjectFileInfo(TM->getObjFileLowering()); + TM->getObjFileLowering()->Initialize(*MCFoo, *TM); + MCFoo->setObjectFileInfo(TM->getObjFileLowering()); + } + + LLVMContext Context; + std::unique_ptr TM; + std::unique_ptr M; + + std::unique_ptr MC; + std::unique_ptr MCFoo; + + Function *Foo; + std::unique_ptr MFFoo; +}; + +TEST_F(X86MCInstLowerTest, moExternalSymbol_MCSYMBOL) { + + MachineModuleInfoWrapperPass *MMIWP = + new MachineModuleInfoWrapperPass(TM.get(), &*MCFoo); + + legacy::PassManager PassMgrF; + SmallString<1024> Buf; + llvm::raw_svector_ostream OS(Buf); + AsmPrinter *Printer = + addPassesToEmitFile(PassMgrF, OS, CodeGenFileType::AssemblyFile, MMIWP); + PassMgrF.run(*M); + + // Check GOT MCSymbol is from Printer.OutContext. + MCSymbol *GOTPrinterPtr = + Printer->OutContext.lookupSymbol("_GLOBAL_OFFSET_TABLE_"); + + // Check GOT MCSymbol is NOT from MachineFunction's MCContext. + MCSymbol *GOTMFCtxPtr = + MMIWP->getMMI().getMachineFunction(*Foo)->getContext().lookupSymbol( + "_GLOBAL_OFFSET_TABLE_"); + + EXPECT_NE(GOTPrinterPtr, nullptr); + EXPECT_EQ(GOTMFCtxPtr, nullptr); +} + +} // end namespace llvm