capstone/bindings/const_generator.py

169 lines
6.1 KiB
Python
Raw Normal View History

# Capstone Disassembler Engine
# By Dang Hoang Vu, 2013
2014-04-11 17:00:33 +08:00
from __future__ import print_function
import sys, re
INCL_DIR = '../include/capstone/'
2018-03-31 17:29:22 +08:00
include = [ 'arm.h', 'arm64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h' ]
template = {
'java': {
'header': "// For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT\npackage capstone;\n\npublic class %s_const {\n",
'footer': "}",
'line_format': '\tpublic static final int %s = %s;\n',
'out_file': './java/capstone/%s_const.java',
# prefixes for constant filenames of all archs - case sensitive
'arm.h': 'Arm',
'arm64.h': 'Arm64',
'm68k.h': 'M68k',
'mips.h': 'Mips',
'x86.h': 'X86',
'ppc.h': 'Ppc',
2014-03-10 14:37:08 +08:00
'sparc.h': 'Sparc',
2014-03-23 11:17:24 +08:00
'systemz.h': 'Sysz',
2014-05-26 23:47:04 +08:00
'xcore.h': 'Xcore',
'tms320c64x.h': 'TMS320C64x',
M680X: Target ready for pull request (#1034) * Added new M680X target. Supports M6800/1/2/3/9, HD6301 * M680X: Reformat for coding guide lines. Set alphabetical order in HACK.TXT * M680X: Prepare for python binding. Move cs_m680x, m680x_insn to m680x_info. Chec > k cpu type, no default. * M680X: Add python bindings. Added python tests. * M680X: Added cpu types to usage message. * cstool: Avoid segfault for invalid <arch+mode>. * Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). Keep xprint.py untouched. * M680X: Update CMake/make for m680x support. Update .gitignore. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). * M680X: Add ocaml bindings and tests. * M680X: Add java bindings and tests. * M680X: Added tests for all indexed addressing modes. C/Python/Ocaml * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Used M680X_FIRST_OP_IN_MNEM in tests C/python/java/ocaml. * M680X: Added access property to cs_m680x_op. * M680X: Added operand size. * M680X: Remove compiler warnings. * M680X: Added READ/WRITE access property per operator. * M680X: Make reg_inherent_hdlr independent of CPU type. * M680X: Add HD6309 support + bug fixes * M680X: Remove errors and warning. * M680X: Add Bcc/LBcc to group BRAREL (relative branch). * M680X: Add group JUMP to BVS/BVC/LBVS/LBVC. Remove BRAREL from BRN/LBRN. * M680X: Remove LBRN from group BRAREL. * M680X: Refactored cpu_type initialization for better readability. * M680X: Add two operands for insn having two reg. in mnemonic. e.g. ABX. * M680X: Remove typo in cstool.c * M680X: Some format improvements in changed_regs. * M680X: Remove insn id string list from tests (C/python/java/ocaml). * M680X: SEXW, set access of reg. D to WRITE. * M680X: Sort changed_regs in increasing m680x_insn order. * M680X: Add M68HC11 support + Reduced from two to one INDEXED operand. * M680X: cstool, also write '(in mnemonic)' for second reg. operand. * M680X: Add BRN/LBRN to group JUMP and BRAREL. * M680X: For Bcc/LBcc/BRSET/BRCLR set reg. CC to read access. * M680X: Correctly print negative immediate values with option CS_OPT_UNSIGNED. * M680X: Rename some instruction handlers. * M680X: Add M68HC05 support. * M680X: Dont print prefix '<' for direct addr. mode. * M680X: Add M68HC08 support + resorted tables + bug fixes. * M680X: Add Freescale HCS08 support. * M680X: Changed group names, avoid spaces. * M680X: Refactoring, rename addessing mode handlers. * M680X: indexed addr. mode, changed pre/post inc-/decrement representation. * M680X: Rename some M6809/HD6309 specific functions. * M680X: Add CPU12 (68HC12/HCS12) support. * M680X: Correctly display illegal instruction as FCB . * M680X: bugfix: BRA/BRN/BSR/LBRA/LBRN/LBSR does not read CC reg. * M680X: bugfix: Correctly check for sufficient code size for M6809 indexed addressing. * M680X: Better support for changing insn id within handler for addessing mode. * M680X: Remove warnings. * M680X: In set_changed_regs_read_write_counts use own access_mode. * M680X: Split cpu specific tables into separate *.inc files. * M680X: Remove warnings. * M680X: Removed address_mode. Addressing mode is available in operand.type * M680X: Bugfix: BSET/BCLR/BRSET/BRCLR correct read/modify CC reg. * M680X: Remove register TMP1. It is first visible in CPU12X. * M680X: Performance improvement + bug fixes. * M680X: Performance improvement, make cpu_tables const static. * M680X: Simplify operand decoding by using two handlers. * M680X: Replace M680X_OP_INDEX by M680X_OP_CONSTANT + bugfix in java/python/ocaml bindings. * M680X: Format with astyle. * M680X: Update documentation. * M680X: Corrected author for m680x specific files. * M680X: Make max. number of architectures single source.
2017-10-21 21:44:36 +08:00
'm680x.h': 'M680x',
2018-03-31 17:29:22 +08:00
'evm.h': 'Evm',
'comment_open': '\t//',
'comment_close': '',
},
'python': {
'header': "# For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n",
'footer': "",
'line_format': '%s = %s\n',
'out_file': './python/capstone/%s_const.py',
# prefixes for constant filenames of all archs - case sensitive
'arm.h': 'arm',
'arm64.h': 'arm64',
2015-08-04 00:45:08 +08:00
'm68k.h': 'm68k',
'mips.h': 'mips',
'x86.h': 'x86',
'ppc.h': 'ppc',
2014-03-10 14:37:08 +08:00
'sparc.h': 'sparc',
2014-03-23 11:17:24 +08:00
'systemz.h': 'sysz',
2014-05-26 23:47:04 +08:00
'xcore.h': 'xcore',
'tms320c64x.h': 'tms320c64x',
M680X: Target ready for pull request (#1034) * Added new M680X target. Supports M6800/1/2/3/9, HD6301 * M680X: Reformat for coding guide lines. Set alphabetical order in HACK.TXT * M680X: Prepare for python binding. Move cs_m680x, m680x_insn to m680x_info. Chec > k cpu type, no default. * M680X: Add python bindings. Added python tests. * M680X: Added cpu types to usage message. * cstool: Avoid segfault for invalid <arch+mode>. * Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). Keep xprint.py untouched. * M680X: Update CMake/make for m680x support. Update .gitignore. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). * M680X: Add ocaml bindings and tests. * M680X: Add java bindings and tests. * M680X: Added tests for all indexed addressing modes. C/Python/Ocaml * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Used M680X_FIRST_OP_IN_MNEM in tests C/python/java/ocaml. * M680X: Added access property to cs_m680x_op. * M680X: Added operand size. * M680X: Remove compiler warnings. * M680X: Added READ/WRITE access property per operator. * M680X: Make reg_inherent_hdlr independent of CPU type. * M680X: Add HD6309 support + bug fixes * M680X: Remove errors and warning. * M680X: Add Bcc/LBcc to group BRAREL (relative branch). * M680X: Add group JUMP to BVS/BVC/LBVS/LBVC. Remove BRAREL from BRN/LBRN. * M680X: Remove LBRN from group BRAREL. * M680X: Refactored cpu_type initialization for better readability. * M680X: Add two operands for insn having two reg. in mnemonic. e.g. ABX. * M680X: Remove typo in cstool.c * M680X: Some format improvements in changed_regs. * M680X: Remove insn id string list from tests (C/python/java/ocaml). * M680X: SEXW, set access of reg. D to WRITE. * M680X: Sort changed_regs in increasing m680x_insn order. * M680X: Add M68HC11 support + Reduced from two to one INDEXED operand. * M680X: cstool, also write '(in mnemonic)' for second reg. operand. * M680X: Add BRN/LBRN to group JUMP and BRAREL. * M680X: For Bcc/LBcc/BRSET/BRCLR set reg. CC to read access. * M680X: Correctly print negative immediate values with option CS_OPT_UNSIGNED. * M680X: Rename some instruction handlers. * M680X: Add M68HC05 support. * M680X: Dont print prefix '<' for direct addr. mode. * M680X: Add M68HC08 support + resorted tables + bug fixes. * M680X: Add Freescale HCS08 support. * M680X: Changed group names, avoid spaces. * M680X: Refactoring, rename addessing mode handlers. * M680X: indexed addr. mode, changed pre/post inc-/decrement representation. * M680X: Rename some M6809/HD6309 specific functions. * M680X: Add CPU12 (68HC12/HCS12) support. * M680X: Correctly display illegal instruction as FCB . * M680X: bugfix: BRA/BRN/BSR/LBRA/LBRN/LBSR does not read CC reg. * M680X: bugfix: Correctly check for sufficient code size for M6809 indexed addressing. * M680X: Better support for changing insn id within handler for addessing mode. * M680X: Remove warnings. * M680X: In set_changed_regs_read_write_counts use own access_mode. * M680X: Split cpu specific tables into separate *.inc files. * M680X: Remove warnings. * M680X: Removed address_mode. Addressing mode is available in operand.type * M680X: Bugfix: BSET/BCLR/BRSET/BRCLR correct read/modify CC reg. * M680X: Remove register TMP1. It is first visible in CPU12X. * M680X: Performance improvement + bug fixes. * M680X: Performance improvement, make cpu_tables const static. * M680X: Simplify operand decoding by using two handlers. * M680X: Replace M680X_OP_INDEX by M680X_OP_CONSTANT + bugfix in java/python/ocaml bindings. * M680X: Format with astyle. * M680X: Update documentation. * M680X: Corrected author for m680x specific files. * M680X: Make max. number of architectures single source.
2017-10-21 21:44:36 +08:00
'm680x.h': 'm680x',
2018-03-31 17:29:22 +08:00
'evm.h': 'evm',
'comment_open': '#',
'comment_close': '',
},
'ocaml': {
'header': "(* For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.ml] *)\n",
'footer': "",
'line_format': 'let _%s = %s;;\n',
'out_file': './ocaml/%s_const.ml',
# prefixes for constant filenames of all archs - case sensitive
'arm.h': 'arm',
'arm64.h': 'arm64',
'mips.h': 'mips',
'm68k.h': 'm68k',
'x86.h': 'x86',
'ppc.h': 'ppc',
'sparc.h': 'sparc',
'systemz.h': 'sysz',
'xcore.h': 'xcore',
'tms320c64x.h': 'tms320c64x',
M680X: Target ready for pull request (#1034) * Added new M680X target. Supports M6800/1/2/3/9, HD6301 * M680X: Reformat for coding guide lines. Set alphabetical order in HACK.TXT * M680X: Prepare for python binding. Move cs_m680x, m680x_insn to m680x_info. Chec > k cpu type, no default. * M680X: Add python bindings. Added python tests. * M680X: Added cpu types to usage message. * cstool: Avoid segfault for invalid <arch+mode>. * Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). Keep xprint.py untouched. * M680X: Update CMake/make for m680x support. Update .gitignore. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Reduce compiler warnings. * M680X: Make test_m680x.c/test_m680x.py output comparable (diff params: -bu). * M680X: Add ocaml bindings and tests. * M680X: Add java bindings and tests. * M680X: Added tests for all indexed addressing modes. C/Python/Ocaml * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Naming, use page1 for PAGE1 instructions (without prefix). * M680X: Used M680X_FIRST_OP_IN_MNEM in tests C/python/java/ocaml. * M680X: Added access property to cs_m680x_op. * M680X: Added operand size. * M680X: Remove compiler warnings. * M680X: Added READ/WRITE access property per operator. * M680X: Make reg_inherent_hdlr independent of CPU type. * M680X: Add HD6309 support + bug fixes * M680X: Remove errors and warning. * M680X: Add Bcc/LBcc to group BRAREL (relative branch). * M680X: Add group JUMP to BVS/BVC/LBVS/LBVC. Remove BRAREL from BRN/LBRN. * M680X: Remove LBRN from group BRAREL. * M680X: Refactored cpu_type initialization for better readability. * M680X: Add two operands for insn having two reg. in mnemonic. e.g. ABX. * M680X: Remove typo in cstool.c * M680X: Some format improvements in changed_regs. * M680X: Remove insn id string list from tests (C/python/java/ocaml). * M680X: SEXW, set access of reg. D to WRITE. * M680X: Sort changed_regs in increasing m680x_insn order. * M680X: Add M68HC11 support + Reduced from two to one INDEXED operand. * M680X: cstool, also write '(in mnemonic)' for second reg. operand. * M680X: Add BRN/LBRN to group JUMP and BRAREL. * M680X: For Bcc/LBcc/BRSET/BRCLR set reg. CC to read access. * M680X: Correctly print negative immediate values with option CS_OPT_UNSIGNED. * M680X: Rename some instruction handlers. * M680X: Add M68HC05 support. * M680X: Dont print prefix '<' for direct addr. mode. * M680X: Add M68HC08 support + resorted tables + bug fixes. * M680X: Add Freescale HCS08 support. * M680X: Changed group names, avoid spaces. * M680X: Refactoring, rename addessing mode handlers. * M680X: indexed addr. mode, changed pre/post inc-/decrement representation. * M680X: Rename some M6809/HD6309 specific functions. * M680X: Add CPU12 (68HC12/HCS12) support. * M680X: Correctly display illegal instruction as FCB . * M680X: bugfix: BRA/BRN/BSR/LBRA/LBRN/LBSR does not read CC reg. * M680X: bugfix: Correctly check for sufficient code size for M6809 indexed addressing. * M680X: Better support for changing insn id within handler for addessing mode. * M680X: Remove warnings. * M680X: In set_changed_regs_read_write_counts use own access_mode. * M680X: Split cpu specific tables into separate *.inc files. * M680X: Remove warnings. * M680X: Removed address_mode. Addressing mode is available in operand.type * M680X: Bugfix: BSET/BCLR/BRSET/BRCLR correct read/modify CC reg. * M680X: Remove register TMP1. It is first visible in CPU12X. * M680X: Performance improvement + bug fixes. * M680X: Performance improvement, make cpu_tables const static. * M680X: Simplify operand decoding by using two handlers. * M680X: Replace M680X_OP_INDEX by M680X_OP_CONSTANT + bugfix in java/python/ocaml bindings. * M680X: Format with astyle. * M680X: Update documentation. * M680X: Corrected author for m680x specific files. * M680X: Make max. number of architectures single source.
2017-10-21 21:44:36 +08:00
'm680x.h': 'm680x',
2018-03-31 17:29:22 +08:00
'evm.h': 'evm',
'comment_open': '(*',
'comment_close': ' *)',
},
}
# markup for comments to be added to autogen files
MARKUP = '//>'
def gen(lang):
global include, INCL_DIR
print('Generating bindings for', lang)
2014-09-22 00:07:58 +08:00
templ = template[lang]
2015-08-03 02:09:41 +08:00
print('Generating bindings for', lang)
for target in include:
prefix = templ[target]
outfile = open(templ['out_file'] %(prefix), 'wb') # open as binary prevents windows newlines
outfile.write((templ['header'] % (prefix)).encode("utf-8"))
lines = open(INCL_DIR + target).readlines()
count = 0
for line in lines:
line = line.strip()
if line.startswith(MARKUP): # markup for comments
outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \
line.replace(MARKUP, ''), \
templ['comment_close']) ).encode("utf-8"))
continue
if line == '' or line.startswith('//'):
continue
if line.startswith('#define '):
line = line[8:] #cut off define
xline = re.split('\s+', line, 1) #split to at most 2 express
if len(xline) != 2:
continue
if '(' in xline[0] or ')' in xline[0]: #does it look like a function
continue
xline.insert(1, '=') # insert an = so the expression below can parse it
line = ' '.join(xline)
if not line.startswith(prefix.upper()):
continue
tmp = line.strip().split(',')
for t in tmp:
t = t.strip()
if not t or t.startswith('//'): continue
# hacky: remove type cast (uint64_t)
t = t.replace('(uint64_t)', '')
t = re.sub(r'\((\d+)ULL << (\d+)\)', r'\1 << \2', t) # (1ULL<<1) to 1 << 1
f = re.split('\s+', t)
if f[0].startswith(prefix.upper()):
if len(f) > 1 and f[1] not in ('//', '///<', '='):
2014-04-11 17:00:33 +08:00
print("Error: Unable to convert %s" % f)
continue
elif len(f) > 1 and f[1] == '=':
rhs = ''.join(f[2:])
else:
rhs = str(count)
count += 1
try:
count = int(rhs) + 1
if (count == 1):
outfile.write(("\n").encode("utf-8"))
except ValueError:
if lang == 'ocaml':
2014-09-22 00:07:58 +08:00
# ocaml uses lsl for '<<', lor for '|'
rhs = rhs.replace('<<', ' lsl ')
rhs = rhs.replace('|', ' lor ')
2014-09-22 00:07:58 +08:00
# ocaml variable has _ as prefix
if rhs[0].isalpha():
rhs = '_' + rhs
outfile.write((templ['line_format'] %(f[0].strip(), rhs)).encode("utf-8"))
outfile.write((templ['footer']).encode("utf-8"))
outfile.close()
def main():
try:
2015-08-03 02:09:41 +08:00
if sys.argv[1] == 'all':
for key in template.keys():
gen(key)
else:
gen(sys.argv[1])
except:
raise RuntimeError("Unsupported binding %s" % sys.argv[1])
if __name__ == "__main__":
if len(sys.argv) < 2:
2015-08-03 02:09:41 +08:00
print("Usage:", sys.argv[0], " <bindings: java|python|ocaml|all>")
sys.exit(1)
main()