2013-11-27 12:11:31 +08:00
|
|
|
# Capstone Python bindings, by Nguyen Anh Quynnh <aquynh@gmail.com>
|
|
|
|
|
|
|
|
import arm, arm64, mips, x86
|
|
|
|
|
|
|
|
__all__ = [
|
2013-12-06 15:26:07 +08:00
|
|
|
'Cs',
|
|
|
|
'CsInsn',
|
2013-11-27 12:11:31 +08:00
|
|
|
'cs_disasm_quick',
|
|
|
|
'cs_version',
|
|
|
|
'CS_ARCH_ARM',
|
|
|
|
'CS_ARCH_ARM64',
|
|
|
|
'CS_ARCH_MIPS',
|
|
|
|
'CS_ARCH_X86',
|
|
|
|
|
|
|
|
'CS_MODE_LITTLE_ENDIAN',
|
|
|
|
'CS_MODE_BIG_ENDIAN',
|
|
|
|
'CS_MODE_16',
|
|
|
|
'CS_MODE_32',
|
|
|
|
'CS_MODE_64',
|
|
|
|
'CS_MODE_ARM',
|
|
|
|
'CS_MODE_THUMB',
|
|
|
|
'CS_MODE_MICRO',
|
|
|
|
'CS_MODE_N64',
|
|
|
|
|
2013-12-04 00:05:04 +08:00
|
|
|
'CS_OPT_SYNTAX',
|
|
|
|
'CS_OPT_SYNTAX_INTEL',
|
|
|
|
'CS_OPT_SYNTAX_ATT',
|
2013-12-03 22:18:28 +08:00
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
'CS_ERR_OK',
|
|
|
|
'CS_ERR_MEM',
|
|
|
|
'CS_ERR_ARCH',
|
|
|
|
'CS_ERR_HANDLE',
|
|
|
|
'CS_ERR_CSH',
|
|
|
|
'CS_ERR_MODE',
|
2013-12-06 00:44:44 +08:00
|
|
|
'CS_ERR_OPTION',
|
2013-11-27 12:11:31 +08:00
|
|
|
]
|
|
|
|
|
2013-12-03 22:18:28 +08:00
|
|
|
# Capstone C interface
|
2013-11-27 12:11:31 +08:00
|
|
|
# architectures
|
|
|
|
CS_ARCH_ARM = 0
|
|
|
|
CS_ARCH_ARM64 = 1
|
|
|
|
CS_ARCH_MIPS = 2
|
|
|
|
CS_ARCH_X86 = 3
|
|
|
|
|
|
|
|
# disasm mode
|
|
|
|
CS_MODE_LITTLE_ENDIAN = 0 # little-endian mode (default mode)
|
|
|
|
CS_MODE_ARM = 0 # ARM mode
|
|
|
|
CS_MODE_16 = (1 << 1) # 16-bit mode (for X86, Mips)
|
|
|
|
CS_MODE_32 = (1 << 2) # 32-bit mode (for X86, Mips)
|
|
|
|
CS_MODE_64 = (1 << 3) # 64-bit mode (for X86, Mips)
|
|
|
|
CS_MODE_THUMB = (1 << 4) # ARM's Thumb mode, including Thumb-2
|
2013-11-30 11:36:32 +08:00
|
|
|
CS_MODE_MICRO = (1 << 4) # MicroMips mode (MIPS architecture)
|
|
|
|
CS_MODE_N64 = (1 << 5) # Nintendo-64 mode (MIPS architecture)
|
2013-11-27 12:11:31 +08:00
|
|
|
CS_MODE_BIG_ENDIAN = (1 << 31) # big-endian mode
|
|
|
|
|
2013-12-03 22:18:28 +08:00
|
|
|
# Capstone option type
|
2013-12-04 00:05:04 +08:00
|
|
|
CS_OPT_SYNTAX = 1 # Intel X86 asm syntax (CS_ARCH_X86 arch)
|
|
|
|
|
|
|
|
# Capstone option value
|
|
|
|
CS_OPT_SYNTAX_INTEL = 1 # Intel X86 asm syntax (CS_ARCH_X86 arch)
|
|
|
|
CS_OPT_SYNTAX_ATT = 2 # ATT asm syntax (CS_ARCH_X86 arch)
|
2013-12-03 22:18:28 +08:00
|
|
|
|
|
|
|
# Capstone error type
|
2013-11-27 12:11:31 +08:00
|
|
|
CS_ERR_OK = 0 # No error: everything was fine
|
2013-12-06 00:44:44 +08:00
|
|
|
CS_ERR_MEM = 1 # Out-Of-Memory error: cs_open(), cs_disasm_dyn()
|
|
|
|
CS_ERR_ARCH = 2 # Unsupported architecture: cs_open()
|
|
|
|
CS_ERR_HANDLE = 3 # Invalid handle: cs_op_count(), cs_op_index()
|
|
|
|
CS_ERR_CSH = 4 # Invalid csh argument: cs_close(), cs_errno(), cs_option()
|
|
|
|
CS_ERR_MODE = 5 # Invalid/unsupported mode: cs_open()
|
|
|
|
CS_ERR_OPTION = 6 # Invalid/unsupported option: cs_option()
|
2013-11-27 12:11:31 +08:00
|
|
|
|
|
|
|
|
|
|
|
import ctypes, ctypes.util
|
|
|
|
from os.path import split, join
|
|
|
|
import distutils.sysconfig
|
|
|
|
|
|
|
|
|
|
|
|
# load all the libs
|
|
|
|
_lib_path = split(__file__)[0]
|
|
|
|
_all_libs = ['capstone.dll', 'libcapstone.so', 'libcapstone.dylib']
|
|
|
|
_found = False
|
|
|
|
|
|
|
|
for _lib in _all_libs:
|
|
|
|
try:
|
|
|
|
_lib_file = join(_lib_path, _lib)
|
|
|
|
# print "Trying to load:", _lib_file
|
|
|
|
_cs = ctypes.cdll.LoadLibrary(_lib_file)
|
|
|
|
_found = True
|
|
|
|
break
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
if _found == False:
|
|
|
|
# try loading from default paths
|
|
|
|
for _lib in _all_libs:
|
|
|
|
try:
|
|
|
|
_cs = ctypes.cdll.LoadLibrary(_lib)
|
|
|
|
_found = True
|
|
|
|
break
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
if _found == False:
|
|
|
|
# last try: loading from python lib directory
|
|
|
|
_lib_path = distutils.sysconfig.get_python_lib()
|
|
|
|
for _lib in _all_libs:
|
|
|
|
try:
|
|
|
|
_lib_file = join(_lib_path, 'capstone', _lib)
|
|
|
|
# print "Trying to load:", _lib_file
|
|
|
|
_cs = ctypes.cdll.LoadLibrary(_lib_file)
|
|
|
|
_found = True
|
|
|
|
break
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
if _found == False:
|
|
|
|
raise ImportError("ERROR: fail to load the dynamic library.")
|
|
|
|
|
|
|
|
|
|
|
|
class _cs_arch(ctypes.Union):
|
|
|
|
_fields_ = (
|
|
|
|
('arm64', arm64._cs_arm64),
|
|
|
|
('arm', arm._cs_arm),
|
|
|
|
('mips', mips._cs_mips),
|
|
|
|
('x86', x86._cs_x86),
|
|
|
|
)
|
|
|
|
|
|
|
|
# low-level structure for C code
|
|
|
|
class _cs_insn(ctypes.Structure):
|
|
|
|
_fields_ = (
|
|
|
|
('id', ctypes.c_uint),
|
2013-12-03 13:13:39 +08:00
|
|
|
('address', ctypes.c_uint64),
|
2013-11-27 12:11:31 +08:00
|
|
|
('size', ctypes.c_uint16),
|
2013-12-04 22:57:04 +08:00
|
|
|
('bytes', ctypes.c_ubyte * 16),
|
2013-11-27 12:11:31 +08:00
|
|
|
('mnemonic', ctypes.c_char * 32),
|
|
|
|
('op_str', ctypes.c_char * 96),
|
|
|
|
('regs_read', ctypes.c_uint * 32),
|
2013-12-03 11:25:13 +08:00
|
|
|
('regs_read_count', ctypes.c_uint),
|
2013-11-27 12:11:31 +08:00
|
|
|
('regs_write', ctypes.c_uint * 32),
|
2013-12-03 11:25:13 +08:00
|
|
|
('regs_write_count', ctypes.c_uint),
|
2013-11-27 12:11:31 +08:00
|
|
|
('groups', ctypes.c_uint * 8),
|
2013-12-03 11:25:13 +08:00
|
|
|
('groups_count', ctypes.c_uint),
|
2013-11-27 12:11:31 +08:00
|
|
|
('arch', _cs_arch),
|
|
|
|
)
|
|
|
|
|
|
|
|
# setup all the function prototype
|
|
|
|
def _setup_prototype(lib, fname, restype, *argtypes):
|
|
|
|
getattr(lib, fname).restype = restype
|
|
|
|
getattr(lib, fname).argtypes = argtypes
|
|
|
|
|
2013-11-29 18:46:03 +08:00
|
|
|
_setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t))
|
2013-12-04 17:54:00 +08:00
|
|
|
_setup_prototype(_cs, "cs_disasm_dyn", ctypes.c_size_t, ctypes.c_size_t, ctypes.c_char_p, ctypes.c_size_t, \
|
2013-12-03 13:13:39 +08:00
|
|
|
ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn)))
|
2013-11-27 12:11:31 +08:00
|
|
|
_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p)
|
2013-12-04 17:37:14 +08:00
|
|
|
_setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.c_size_t)
|
2013-11-29 18:46:03 +08:00
|
|
|
_setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_insn_group", ctypes.c_bool, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_reg_read", ctypes.c_bool, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_reg_write", ctypes.c_bool, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_op_count", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint)
|
|
|
|
_setup_prototype(_cs, "cs_op_index", ctypes.c_int, ctypes.c_size_t, ctypes.POINTER(_cs_insn), ctypes.c_uint, ctypes.c_uint)
|
2013-11-27 12:11:31 +08:00
|
|
|
_setup_prototype(_cs, "cs_version", None, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int))
|
2013-12-03 22:18:28 +08:00
|
|
|
_setup_prototype(_cs, "cs_errno", ctypes.c_int, ctypes.c_size_t)
|
2013-12-04 09:44:07 +08:00
|
|
|
_setup_prototype(_cs, "cs_option", ctypes.c_int, ctypes.c_size_t, ctypes.c_int, ctypes.c_size_t)
|
2013-11-27 12:11:31 +08:00
|
|
|
|
|
|
|
|
|
|
|
def cs_version():
|
|
|
|
major = ctypes.c_int()
|
|
|
|
minor = ctypes.c_int()
|
|
|
|
_cs.cs_version(ctypes.byref(major), ctypes.byref(minor))
|
|
|
|
return (major.value, minor.value)
|
|
|
|
|
|
|
|
|
2013-12-06 15:26:07 +08:00
|
|
|
# access to error code via @errno of CsError
|
|
|
|
class CsError(Exception):
|
|
|
|
def __init__(self, errno):
|
|
|
|
self.errno = errno
|
|
|
|
|
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
# quick & dirty Python function to disasm raw binary code
|
|
|
|
def cs_disasm_quick(arch, mode, code, offset, count = 0):
|
2013-11-29 18:46:03 +08:00
|
|
|
csh = ctypes.c_size_t()
|
2013-11-27 12:11:31 +08:00
|
|
|
status = _cs.cs_open(arch, mode, ctypes.byref(csh))
|
|
|
|
if status != CS_ERR_OK:
|
2013-12-06 15:26:07 +08:00
|
|
|
raise CsError(status)
|
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
all_insn = ctypes.POINTER(_cs_insn)()
|
|
|
|
res = _cs.cs_disasm_dyn(csh, code, len(code), offset, count, ctypes.byref(all_insn))
|
|
|
|
if res > 0:
|
|
|
|
for i in xrange(res):
|
|
|
|
yield all_insn[i]
|
2013-12-04 09:51:12 +08:00
|
|
|
|
|
|
|
_cs.cs_free(all_insn)
|
2013-11-27 12:11:31 +08:00
|
|
|
else:
|
|
|
|
yield []
|
|
|
|
|
2013-12-06 15:26:07 +08:00
|
|
|
status = _cs.cs_close(csh)
|
|
|
|
if status != CS_ERR_OK:
|
|
|
|
raise CsError(status)
|
2013-11-27 12:11:31 +08:00
|
|
|
|
|
|
|
|
|
|
|
# Python-style class to disasm code
|
2013-12-06 15:26:07 +08:00
|
|
|
class CsInsn(object):
|
2013-11-27 12:11:31 +08:00
|
|
|
def __init__(self, csh, all_info, arch):
|
|
|
|
self.id = all_info.id
|
|
|
|
self.address = all_info.address
|
|
|
|
self.size = all_info.size
|
|
|
|
self.mnemonic = all_info.mnemonic
|
|
|
|
self.op_str = all_info.op_str
|
2013-12-03 11:25:13 +08:00
|
|
|
self.regs_read = all_info.regs_read[:all_info.regs_read_count]
|
|
|
|
self.regs_write = all_info.regs_write[:all_info.regs_write_count]
|
|
|
|
self.groups = all_info.groups[:all_info.groups_count]
|
2013-12-04 22:57:04 +08:00
|
|
|
self.bytes = bytearray(all_info.bytes)[:self.size]
|
2013-11-27 12:11:31 +08:00
|
|
|
|
|
|
|
if arch == CS_ARCH_ARM:
|
|
|
|
(self.cc, self.update_flags, self.writeback, self.operands) = \
|
|
|
|
arm.get_arch_info(all_info.arch.arm)
|
|
|
|
elif arch == CS_ARCH_ARM64:
|
|
|
|
(self.cc, self.update_flags, self.writeback, self.operands) = \
|
|
|
|
arm64.get_arch_info(all_info.arch.arm64)
|
|
|
|
elif arch == CS_ARCH_X86:
|
|
|
|
(self.prefix, self.segment, self.opcode, self.op_size, self.addr_size, \
|
|
|
|
self.disp_size, self.imm_size, self.modrm, self.sib, self.disp, \
|
2013-11-28 12:51:11 +08:00
|
|
|
self.sib_index, self.sib_scale, self.sib_base, self.operands) = x86.get_arch_info(all_info.arch.x86)
|
2013-11-27 12:11:31 +08:00
|
|
|
elif arch == CS_ARCH_MIPS:
|
|
|
|
self.operands = mips.get_arch_info(all_info.arch.mips)
|
|
|
|
|
|
|
|
# save original insn for later use
|
|
|
|
self.raw_insn = all_info
|
|
|
|
self.csh = csh
|
|
|
|
|
|
|
|
def errno():
|
|
|
|
return _cs.cs_errno(self.csh)
|
|
|
|
|
|
|
|
def reg_name(self, reg_id):
|
|
|
|
return _cs.cs_reg_name(self.csh, reg_id)
|
|
|
|
|
|
|
|
def insn_name(self):
|
|
|
|
return _cs.cs_insn_name(self.csh, self.id)
|
|
|
|
|
|
|
|
def group(self, group_id):
|
|
|
|
return _cs.cs_insn_group(self.csh, self.raw_insn, group_id)
|
|
|
|
|
|
|
|
def reg_read(self, reg_id):
|
|
|
|
return _cs.cs_reg_read(self.csh, self.raw_insn, reg_id)
|
|
|
|
|
|
|
|
def reg_write(self, reg_id):
|
|
|
|
return _cs.cs_reg_write(self.csh, self.raw_insn, reg_id)
|
|
|
|
|
2013-12-06 15:26:07 +08:00
|
|
|
# return number of operands having same operand type @op_type
|
2013-11-27 12:11:31 +08:00
|
|
|
def op_count(self, op_type):
|
2013-12-06 15:26:07 +08:00
|
|
|
res = _cs.cs_op_count(self.csh, self.raw_insn, op_type)
|
|
|
|
if res < 0:
|
|
|
|
raise CsError(_cs.cs_errno(self.csh))
|
|
|
|
return res
|
2013-11-27 12:11:31 +08:00
|
|
|
|
|
|
|
def op_index(self, op_type, position):
|
2013-12-06 15:26:07 +08:00
|
|
|
res = _cs.cs_op_index(self.csh, self.raw_insn, op_type, position)
|
|
|
|
if res < 0:
|
|
|
|
raise CsError(_cs.cs_errno(self.csh))
|
|
|
|
return res
|
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
|
2013-12-06 15:26:07 +08:00
|
|
|
class Cs(object):
|
2013-11-27 12:11:31 +08:00
|
|
|
def __init__(self, arch, mode):
|
|
|
|
self.arch, self.mode = arch, mode
|
2013-11-29 18:46:03 +08:00
|
|
|
self.csh = ctypes.c_size_t()
|
2013-11-27 12:11:31 +08:00
|
|
|
status = _cs.cs_open(arch, mode, ctypes.byref(self.csh))
|
|
|
|
if status != CS_ERR_OK:
|
2013-12-06 15:26:07 +08:00
|
|
|
raise CsError(status)
|
2013-11-27 12:11:31 +08:00
|
|
|
|
2013-12-06 00:44:44 +08:00
|
|
|
if arch == CS_ARCH_X86:
|
|
|
|
# Intel syntax is default for X86
|
|
|
|
self._syntax = CS_OPT_SYNTAX_INTEL
|
|
|
|
else:
|
|
|
|
self._syntax = None
|
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
def __del__(self):
|
2013-12-06 15:26:07 +08:00
|
|
|
status = _cs.cs_close(self.csh)
|
|
|
|
if status != CS_ERR_OK:
|
|
|
|
raise CsError(status)
|
2013-11-27 12:11:31 +08:00
|
|
|
|
2013-12-06 00:44:44 +08:00
|
|
|
#def option(self, opt_type, opt_value):
|
|
|
|
# return _cs.cs_option(self.csh, opt_type, opt_value)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def syntax(self):
|
|
|
|
return self._syntax
|
|
|
|
|
|
|
|
@syntax.setter
|
|
|
|
def syntax(self, style):
|
2013-12-06 15:26:07 +08:00
|
|
|
status = _cs.cs_option(self.csh, CS_OPT_SYNTAX, style)
|
|
|
|
if status != CS_ERR_OK:
|
|
|
|
raise CsError(status)
|
|
|
|
# save syntax
|
|
|
|
self._syntax = style
|
2013-12-03 22:18:28 +08:00
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
def disasm(self, code, offset, count = 0):
|
|
|
|
all_insn = ctypes.POINTER(_cs_insn)()
|
|
|
|
res = _cs.cs_disasm_dyn(self.csh, code, len(code), offset, count, ctypes.byref(all_insn))
|
|
|
|
if res > 0:
|
|
|
|
for i in xrange(res):
|
2013-12-06 15:26:07 +08:00
|
|
|
yield CsInsn(self.csh, all_insn[i], self.arch)
|
2013-11-27 12:11:31 +08:00
|
|
|
_cs.cs_free(all_insn)
|
|
|
|
else:
|
2013-12-06 15:26:07 +08:00
|
|
|
status = _cs.cs_errno(self.csh)
|
|
|
|
if status != CS_ERR_OK:
|
|
|
|
raise CsError(status)
|
|
|
|
|
2013-11-27 12:11:31 +08:00
|
|
|
yield []
|
2013-12-06 15:26:07 +08:00
|
|
|
|