Added a new disassembler plugin, DisassemblerLLVMC,

which uses the Disassembler.h interface to the LLVM
disassemblers rather than the EnhancedDisassembly.h
interface.  Disassembler.h is a better-maintained
API and will be stabler in the long term.

Currently the output from Disassembler.h does not
provide for symbolic disassembly in all the places
that the old disassembler did, so I have gated (and
disabled) the disassembler.  It'll be easy to flip
the switch later.

In the meantime, to enable the new disassembler,
uncomment "#define USE_NEW_DISASSEMBLER" in
lldb.cpp.

llvm-svn: 150772
This commit is contained in:
Sean Callanan
2012-02-17 00:53:45 +00:00
parent def9c61e4b
commit 95e5c63012
3 changed files with 716 additions and 0 deletions

View File

@@ -0,0 +1,580 @@
//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "DisassemblerLLVMC.h"
#include "llvm-c/Disassembler.h"
#include "llvm/Support/TargetSelect.h"
#include "lldb/Core/Address.h"
#include "lldb/Core/DataExtractor.h"
#include "lldb/Core/Stream.h"
#include "lldb/Symbol/SymbolContext.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Target/Target.h"
#include "lldb/Target/StackFrame.h"
#include <regex.h>
using namespace lldb;
using namespace lldb_private;
class InstructionLLVMC : public lldb_private::Instruction
{
public:
InstructionLLVMC (DisassemblerLLVMC &disasm,
const lldb_private::Address &address,
lldb_private::AddressClass addr_class) :
Instruction(address, addr_class),
m_disasm(disasm),
m_is_valid(false),
m_no_comments(true),
m_comment_stream()
{
}
virtual
~InstructionLLVMC ()
{
}
static void
PadToWidth (lldb_private::StreamString &ss,
int new_width)
{
int old_width = ss.GetSize();
if (old_width < new_width)
{
ss.Printf("%*s", new_width - old_width, "");
}
}
virtual void
Dump (lldb_private::Stream *s,
uint32_t max_opcode_byte_size,
bool show_address,
bool show_bytes,
const lldb_private::ExecutionContext* exe_ctx,
bool raw)
{
const size_t opcode_column_width = 7;
const size_t operand_column_width = 25;
StreamString ss;
ExecutionContextScope *exe_scope = NULL;
if ((!raw) && exe_ctx)
{
exe_scope = exe_ctx->GetBestExecutionContextScope();
DataExtractor extractor(m_raw_bytes.data(),
m_raw_bytes.size(),
m_disasm.GetArchitecture().GetByteOrder(),
m_disasm.GetArchitecture().GetAddressByteSize());
Parse <true> (m_address,
m_address_class,
extractor,
0,
exe_scope);
}
if (show_address)
{
m_address.Dump(&ss,
exe_scope,
Address::DumpStyleLoadAddress,
Address::DumpStyleModuleWithFileAddress,
0);
ss.PutCString(": ");
}
if (show_bytes)
{
if (m_opcode.GetType() == Opcode::eTypeBytes)
{
// x86_64 and i386 are the only ones that use bytes right now so
// pad out the byte dump to be able to always show 15 bytes (3 chars each)
// plus a space
if (max_opcode_byte_size > 0)
m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
else
m_opcode.Dump (&ss, 15 * 3 + 1);
}
else
{
// Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
// plus two for padding...
if (max_opcode_byte_size > 0)
m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
else
m_opcode.Dump (&ss, 12);
}
}
int size_before_inst = ss.GetSize();
ss.PutCString(m_opcode_name.c_str());
PadToWidth(ss, size_before_inst + opcode_column_width);
ss.PutCString(m_mnemocics.c_str());
PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
if (!m_comment.empty())
{
ss.PutCString(" ; ");
ss.PutCString(m_comment.c_str());
}
ss.Flush();
s->PutCString(ss.GetData());
}
virtual bool
DoesBranch () const
{
return false;
}
virtual size_t
Decode (const lldb_private::Disassembler &disassembler,
const lldb_private::DataExtractor &data,
uint32_t data_offset)
{
Parse <false> (m_address,
m_address_class,
data,
data_offset,
NULL);
return m_opcode.GetByteSize();
}
void
AddReferencedAddress (std::string &description)
{
if (m_no_comments)
m_comment_stream.PutCString(", ");
else
m_no_comments = true;
m_comment_stream.PutCString(description.c_str());
}
virtual void
CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
{
DataExtractor extractor(m_raw_bytes.data(),
m_raw_bytes.size(),
m_disasm.GetArchitecture().GetByteOrder(),
m_disasm.GetArchitecture().GetAddressByteSize());
Parse <true> (m_address,
m_address_class,
extractor,
0,
exe_scope);
}
bool
IsValid ()
{
return m_is_valid;
}
size_t
GetByteSize ()
{
return m_opcode.GetByteSize();
}
protected:
void PopulateOpcode (const DataExtractor &extractor,
uint32_t offset,
size_t inst_size)
{
llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
switch (arch)
{
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
switch (inst_size)
{
case 2:
{
m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
break;
}
break;
case 4:
{
if (arch == llvm::Triple::arm &&
m_address_class == eAddressClassCodeAlternateISA)
{
// If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
uint32_t orig_bytes = extractor.GetU32 (&offset);
uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
uint32_t swapped = (lower_bits << 16) | upper_bits;
m_opcode.SetOpcode32 (swapped);
}
else
{
m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
}
}
break;
default:
assert (!"Invalid ARM opcode size");
break;
}
break;
}
}
template <bool Reparse> bool Parse (const lldb_private::Address &address,
lldb_private::AddressClass addr_class,
const DataExtractor &extractor,
uint32_t data_offset,
lldb_private::ExecutionContextScope *exe_scope)
{
std::vector<char> out_string(256);
const uint8_t *data_start = extractor.GetDataStart();
m_disasm.Lock(this, exe_scope);
::LLVMDisasmContextRef disasm_context;
if (addr_class == eAddressClassCodeAlternateISA)
disasm_context = m_disasm.m_alternate_disasm_context;
else
disasm_context = m_disasm.m_disasm_context;
m_comment_stream.Clear();
size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
const_cast<uint8_t*>(data_start) + data_offset,
extractor.GetByteSize() - data_offset,
address.GetFileAddress(),
out_string.data(),
out_string.size());
m_comment_stream.Flush();
m_no_comments = false;
m_comment.swap(m_comment_stream.GetString());
m_disasm.Unlock();
if (Reparse)
{
if (inst_size != m_raw_bytes.size())
return false;
}
else
{
if (!inst_size)
return false;
PopulateOpcode(extractor, data_offset, inst_size);
m_raw_bytes.resize(inst_size);
memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
if (!s_regex_compiled)
{
::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
s_regex_compiled = true;
}
::regmatch_t matches[3];
const char *out_data = out_string.data();
if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
{
if (matches[1].rm_so != -1)
m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
if (matches[2].rm_so != -1)
m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
}
m_is_valid = true;
}
return true;
}
bool m_is_valid;
DisassemblerLLVMC &m_disasm;
std::vector<uint8_t> m_raw_bytes;
bool m_no_comments;
StreamString m_comment_stream;
static bool s_regex_compiled;
static ::regex_t s_regex;
};
bool InstructionLLVMC::s_regex_compiled = false;
::regex_t InstructionLLVMC::s_regex;
Disassembler *
DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
{
std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
if (disasm_ap.get() && disasm_ap->IsValid())
return disasm_ap.release();
return NULL;
}
DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
Disassembler(arch),
m_disasm_context(NULL),
m_alternate_disasm_context(NULL)
{
m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
(void*)this,
/*TagType=*/1,
DisassemblerLLVMC::OpInfoCallback,
DisassemblerLLVMC::SymbolLookupCallback);
if (arch.GetTriple().getArch() == llvm::Triple::arm)
{
m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
(void*)this,
/*TagType=*/1,
DisassemblerLLVMC::OpInfoCallback,
DisassemblerLLVMC::SymbolLookupCallback);
}
}
DisassemblerLLVMC::~DisassemblerLLVMC()
{
}
size_t
DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
const DataExtractor& data,
uint32_t data_offset,
uint32_t num_instructions,
bool append)
{
if (!append)
m_instruction_list.Clear();
if (!IsValid())
return 0;
uint32_t data_cursor = data_offset;
size_t data_byte_size = data.GetByteSize();
uint32_t instructions_parsed = 0;
uint64_t instruction_pointer = base_addr.GetFileAddress();
std::vector<char> out_string(256);
while (data_offset < data_byte_size && instructions_parsed < num_instructions)
{
Address instr_address = base_addr;
instr_address.Slide(data_cursor);
AddressClass address_class = eAddressClassUnknown;
if (m_alternate_disasm_context)
address_class = instr_address.GetAddressClass ();
InstructionSP inst_sp(new InstructionLLVMC(*this,
instr_address,
address_class));
if (!inst_sp)
return data_cursor - data_offset;
uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
if (!inst_size)
return data_cursor - data_offset;
m_instruction_list.Append(inst_sp);
instruction_pointer += inst_size;
data_cursor += inst_size;
instructions_parsed++;
}
return data_cursor - data_offset;
}
void
DisassemblerLLVMC::Initialize()
{
PluginManager::RegisterPlugin (GetPluginNameStatic(),
GetPluginDescriptionStatic(),
CreateInstance);
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
}
void
DisassemblerLLVMC::Terminate()
{
PluginManager::UnregisterPlugin (CreateInstance);
}
const char *
DisassemblerLLVMC::GetPluginNameStatic()
{
return "llvm";
}
const char *
DisassemblerLLVMC::GetPluginDescriptionStatic()
{
return "Disassembler that uses LLVM opcode tables to disassemble i386, x86_64 and ARM.";
}
int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
uint64_t PC,
uint64_t Offset,
uint64_t Size,
int TagType,
void *TagBug)
{
return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
Offset,
Size,
TagType,
TagBug);
}
const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName)
{
return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
ReferenceType,
ReferencePC,
ReferenceName);
}
int DisassemblerLLVMC::OpInfo (uint64_t PC,
uint64_t Offset,
uint64_t Size,
int TagType,
void *TagBug)
{
switch (TagType)
{
default:
break;
case 1:
bzero (TagBug, sizeof(::LLVMOpInfo1));
break;
}
return 0;
}
const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName)
{
const char *result_name = NULL;
uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
const char *result_referred_name = NULL;
if (m_exe_scope && m_inst)
{
Address reference_address;
Target *target = m_exe_scope->CalculateTarget();
if (target)
{
if (!target->GetSectionLoadList().IsEmpty())
target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address);
else
target->GetImages().ResolveFileAddress(ReferenceValue, reference_address);
if (reference_address.IsValid())
{
SymbolContext reference_sc;
target->GetImages().ResolveSymbolContextForAddress(reference_address,
eSymbolContextFunction | eSymbolContextSymbol,
reference_sc);
StreamString ss;
const bool show_fullpaths = false;
const bool show_module = true;
const bool show_inlined_frames = false;
reference_sc.DumpStopContext(&ss,
m_exe_scope,
reference_address,
show_fullpaths,
show_module,
show_inlined_frames);
m_inst->AddReferencedAddress(ss.GetString());
}
}
}
*ReferenceType = result_reference_type;
*ReferenceName = result_referred_name;
return result_name;
}
//------------------------------------------------------------------
// PluginInterface protocol
//------------------------------------------------------------------
const char *
DisassemblerLLVMC::GetPluginName()
{
return "DisassemblerLLVMC";
}
const char *
DisassemblerLLVMC::GetShortPluginName()
{
return GetPluginNameStatic();
}
uint32_t
DisassemblerLLVMC::GetPluginVersion()
{
return 1;
}

View File

@@ -0,0 +1,126 @@
//===-- DisassemblerLLVMC.h -------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef liblldb_DisassemblerLLVMC_h_
#define liblldb_DisassemblerLLVMC_h_
#include "llvm-c/Disassembler.h"
#include "lldb/Core/Address.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Host/Mutex.h"
class InstructionLLVMC;
class DisassemblerLLVMC : public lldb_private::Disassembler
{
public:
//------------------------------------------------------------------
// Static Functions
//------------------------------------------------------------------
static void
Initialize();
static void
Terminate();
static const char *
GetPluginNameStatic();
static const char *
GetPluginDescriptionStatic();
static lldb_private::Disassembler *
CreateInstance(const lldb_private::ArchSpec &arch);
DisassemblerLLVMC(const lldb_private::ArchSpec &arch);
virtual
~DisassemblerLLVMC();
size_t
DecodeInstructions (const lldb_private::Address &base_addr,
const lldb_private::DataExtractor& data,
uint32_t data_offset,
uint32_t num_instructions,
bool append);
//------------------------------------------------------------------
// PluginInterface protocol
//------------------------------------------------------------------
virtual const char *
GetPluginName();
virtual const char *
GetShortPluginName();
virtual uint32_t
GetPluginVersion();
protected:
friend class InstructionLLVMC;
bool
IsValid()
{
return (m_disasm_context != NULL);
}
int OpInfo(uint64_t PC,
uint64_t Offset,
uint64_t Size,
int TagType,
void *TagBug);
const char *SymbolLookup (uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName);
static int OpInfoCallback (void *DisInfo,
uint64_t PC,
uint64_t Offset,
uint64_t Size,
int TagType,
void *TagBug);
static const char *SymbolLookupCallback(void *DisInfo,
uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName);
void Lock(InstructionLLVMC *inst,
lldb_private::ExecutionContextScope *exe_scope)
{
m_mutex.Lock();
m_inst = inst;
m_exe_scope = exe_scope;
}
void Unlock()
{
m_exe_scope = NULL;
m_inst = NULL;
m_mutex.Unlock();
}
lldb_private::ExecutionContextScope *m_exe_scope;
InstructionLLVMC *m_inst;
lldb_private::Mutex m_mutex;
::LLVMDisasmContextRef m_disasm_context;
::LLVMDisasmContextRef m_alternate_disasm_context;
};
#endif // liblldb_DisassemblerLLVM_h_

View File

@@ -26,6 +26,7 @@
#include "Plugins/ABI/MacOSX-arm/ABIMacOSX_arm.h"
#include "Plugins/ABI/SysV-x86_64/ABISysV_x86_64.h"
#include "Plugins/Disassembler/llvm/DisassemblerLLVM.h"
#include "Plugins/Disassembler/llvm/DisassemblerLLVMC.h"
#include "Plugins/Instruction/ARM/EmulateInstructionARM.h"
#include "Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h"
#include "Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h"
@@ -72,6 +73,7 @@
using namespace lldb;
using namespace lldb_private;
//#define USE_NEW_DISASSEMBLER
void
lldb_private::Initialize ()
@@ -91,7 +93,11 @@ lldb_private::Initialize ()
ABIMacOSX_i386::Initialize();
ABIMacOSX_arm::Initialize();
ABISysV_x86_64::Initialize();
#if defined (USE_NEW_DISASSEMBLER)
DisassemblerLLVMC::Initialize();
#else
DisassemblerLLVM::Initialize();
#endif
ObjectContainerBSDArchive::Initialize();
ObjectFileELF::Initialize();
SymbolFileDWARF::Initialize();
@@ -166,7 +172,11 @@ lldb_private::Terminate ()
ABIMacOSX_i386::Terminate();
ABIMacOSX_arm::Terminate();
ABISysV_x86_64::Terminate();
#if defined (USE_NEW_DISASSEMBLER)
DisassemblerLLVMC::Terminate();
#else
DisassemblerLLVM::Terminate();
#endif
ObjectContainerBSDArchive::Terminate();
ObjectFileELF::Terminate();
SymbolFileDWARF::Terminate();