mirror of
https://gitlab.com/qemu-project/capstone.git
synced 2025-09-17 02:01:15 +08:00
suite: add testsuite tool 'test_mc.sh' to compare output of Capstone & LLVM
This commit is contained in:
16
suite/README
16
suite/README
@ -2,6 +2,12 @@ This directory contains some tools used by developers of Capstone project.
|
|||||||
Average users should ignore all the contents here.
|
Average users should ignore all the contents here.
|
||||||
|
|
||||||
|
|
||||||
|
- arm/
|
||||||
|
Test some ARM's special input.
|
||||||
|
|
||||||
|
- MC/
|
||||||
|
Input used to test various architectures & modes.
|
||||||
|
|
||||||
- benchmark.py
|
- benchmark.py
|
||||||
This script benchmarks Python binding by disassembling some random code.
|
This script benchmarks Python binding by disassembling some random code.
|
||||||
|
|
||||||
@ -17,3 +23,13 @@ Average users should ignore all the contents here.
|
|||||||
- fuzz.py
|
- fuzz.py
|
||||||
This simple script disassembles random code for all archs (or selected arch)
|
This simple script disassembles random code for all archs (or selected arch)
|
||||||
in order to find segfaults.
|
in order to find segfaults.
|
||||||
|
|
||||||
|
- test_mc.sh
|
||||||
|
This script compares the output of Capstone with LLVM's llvm-mc with the
|
||||||
|
input coming from MC/. This relies on test_mc.py to do all the hard works.
|
||||||
|
|
||||||
|
- x86odd.py
|
||||||
|
Test some tricky X86 instructions.
|
||||||
|
|
||||||
|
- ppcbranch.py
|
||||||
|
Test some tricky branch PPC instructions.
|
||||||
|
167
suite/test_mc.py
Executable file
167
suite/test_mc.py
Executable file
@ -0,0 +1,167 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import array, os.path, sys
|
||||||
|
from subprocess import Popen, PIPE, STDOUT
|
||||||
|
from capstone import *
|
||||||
|
|
||||||
|
def run_mc(arch, hexcode, option, syntax=None):
|
||||||
|
def normalize(text):
|
||||||
|
# remove tabs
|
||||||
|
items = text.split()
|
||||||
|
text = ' '.join(items)
|
||||||
|
# remove comment after #
|
||||||
|
if arch == CS_ARCH_X86:
|
||||||
|
i = text.find('# ')
|
||||||
|
if i != -1:
|
||||||
|
return text[:i].lower()
|
||||||
|
return text.lower()
|
||||||
|
|
||||||
|
#print("Trying to decode: %s" %hexcode)
|
||||||
|
if syntax:
|
||||||
|
if arch == CS_ARCH_MIPS:
|
||||||
|
p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
else:
|
||||||
|
p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
else:
|
||||||
|
if arch == CS_ARCH_MIPS:
|
||||||
|
#p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa,micromips', option], stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
#p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', option], stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
else:
|
||||||
|
p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
|
||||||
|
output = p.communicate(input=hexcode)[0]
|
||||||
|
lines = output.split('\n')
|
||||||
|
#print lines
|
||||||
|
if 'invalid' in lines[0]:
|
||||||
|
#print 'invalid ----'
|
||||||
|
return 'FAILED to disassemble'
|
||||||
|
else:
|
||||||
|
#print 'OK:', lines[1]
|
||||||
|
return normalize(lines[1].strip())
|
||||||
|
|
||||||
|
def test_file(fname):
|
||||||
|
print("Test %s" %fname);
|
||||||
|
f = open(fname)
|
||||||
|
lines = f.readlines()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
if not lines[0].startswith('# '):
|
||||||
|
print("ERROR: decoding information is missing")
|
||||||
|
return
|
||||||
|
|
||||||
|
# skip '# ' at the front, then split line to get out hexcode
|
||||||
|
# Note: option can be '', or 'None'
|
||||||
|
#print lines[0]
|
||||||
|
#print lines[0][2:].split(', ')
|
||||||
|
(arch, mode, option) = lines[0][2:].split(', ')
|
||||||
|
mode = mode.replace(' ', '')
|
||||||
|
option = option.strip()
|
||||||
|
|
||||||
|
archs = {
|
||||||
|
"CS_ARCH_ARM": CS_ARCH_ARM,
|
||||||
|
"CS_ARCH_ARM64": CS_ARCH_ARM64,
|
||||||
|
"CS_ARCH_MIPS": CS_ARCH_MIPS,
|
||||||
|
"CS_ARCH_PPC": CS_ARCH_PPC,
|
||||||
|
"CS_ARCH_SPARC": CS_ARCH_SPARC,
|
||||||
|
"CS_ARCH_SYSZ": CS_ARCH_SYSZ,
|
||||||
|
"CS_ARCH_X86": CS_ARCH_X86,
|
||||||
|
"CS_ARCH_XCORE": CS_ARCH_XCORE,
|
||||||
|
}
|
||||||
|
|
||||||
|
modes = {
|
||||||
|
"CS_MODE_16": CS_MODE_16,
|
||||||
|
"CS_MODE_32": CS_MODE_32,
|
||||||
|
"CS_MODE_64": CS_MODE_64,
|
||||||
|
"0": CS_MODE_ARM,
|
||||||
|
"CS_MODE_ARM": CS_MODE_ARM,
|
||||||
|
"CS_MODE_THUMB": CS_MODE_THUMB,
|
||||||
|
"CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
|
||||||
|
"CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
|
||||||
|
"CS_MODE_32+CS_MODE_BIG_ENDIAN": CS_MODE_32+CS_MODE_BIG_ENDIAN,
|
||||||
|
"CS_MODE_32+CS_MODE_LITTLE_ENDIAN": CS_MODE_32+CS_MODE_LITTLE_ENDIAN,
|
||||||
|
"CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
|
||||||
|
"CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
|
||||||
|
"CS_MODE_32+CS_MODE_MICRO": CS_MODE_32+CS_MODE_MICRO,
|
||||||
|
"CS_MODE_32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
|
||||||
|
"CS_MODE_32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
|
||||||
|
"CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
|
||||||
|
}
|
||||||
|
|
||||||
|
options = {
|
||||||
|
"CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
|
||||||
|
"CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
|
||||||
|
}
|
||||||
|
|
||||||
|
mc_modes = {
|
||||||
|
("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
|
||||||
|
("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
|
||||||
|
("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
|
||||||
|
("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=armv7'],
|
||||||
|
("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_64"): ['-triple=mips64el'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_32"): ['-triple=mipsel'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
|
||||||
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
|
||||||
|
("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
|
||||||
|
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
|
||||||
|
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
|
||||||
|
('CS_ARCH_SYSZ', '0'): ['-triple=s390x'],
|
||||||
|
}
|
||||||
|
|
||||||
|
#if not option in ('', 'None'):
|
||||||
|
# print archs[arch], modes[mode], options[option]
|
||||||
|
|
||||||
|
#print(arch, mode, option)
|
||||||
|
md = Cs(archs[arch], modes[mode])
|
||||||
|
|
||||||
|
mc_option = None
|
||||||
|
if arch == 'CS_ARCH_X86':
|
||||||
|
mc_option = '-output-asm-variant=1'
|
||||||
|
|
||||||
|
if arch == 'CS_ARCH_ARM':
|
||||||
|
md.syntax = CS_OPT_SYNTAX_NOREGNAME
|
||||||
|
|
||||||
|
#if not option in ('', 'None'): # ATT syntax?
|
||||||
|
# #md.syntax = options[option]
|
||||||
|
|
||||||
|
for line in lines[1:]:
|
||||||
|
if line.startswith('#'):
|
||||||
|
continue
|
||||||
|
#print("Check %s" %line)
|
||||||
|
code = line.split(' = ')[0]
|
||||||
|
hex_code = code.replace('0x', '')
|
||||||
|
hex_code = hex_code.replace(',', '')
|
||||||
|
hex_data = hex_code.decode('hex')
|
||||||
|
#hex_bytes = array.array('B', hex_data)
|
||||||
|
|
||||||
|
x = list(md.disasm(hex_data, 0))
|
||||||
|
if len(x) > 0:
|
||||||
|
if x[0].op_str != '':
|
||||||
|
cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
|
||||||
|
else:
|
||||||
|
cs_output = x[0].mnemonic
|
||||||
|
else:
|
||||||
|
cs_output = 'FAILED to disassemble'
|
||||||
|
|
||||||
|
mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
|
||||||
|
if (cs_output != mc_output):
|
||||||
|
print("Mismatch: %s" %code)
|
||||||
|
print("\tMC = %s" %mc_output)
|
||||||
|
print("\tCS = %s" %cs_output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
fnames = sys.stdin.readlines()
|
||||||
|
for fname in fnames:
|
||||||
|
test_file(fname.strip())
|
||||||
|
else:
|
||||||
|
#print("Usage: ./test_mc.py <input-file.s.cs>")
|
||||||
|
test_file(sys.argv[1])
|
||||||
|
#run_mc('0x33', '-triple=i386')
|
||||||
|
#run_mc('0x0f,0x0e', '-triple=i386')
|
||||||
|
#run_mc('0x0f,0x0f,0xca,0xae', '-triple=i386', '-output-asm-variant=1')
|
||||||
|
|
15
suite/test_mc.sh
Executable file
15
suite/test_mc.sh
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# This script test all architectures by default.
|
||||||
|
# At the output are all the mismatches between Capstone (CS) & LLVM (MC).
|
||||||
|
# While most differences coming from the fact that Capstone uses more friendly
|
||||||
|
# number format, some mismatches might be because Capstone is based on older
|
||||||
|
# version of LLVM (which should be fixed in the next release)
|
||||||
|
|
||||||
|
find MC/ -name *.cs | ./test_mc.py
|
||||||
|
|
||||||
|
# To test just one architecture, specify the corresponsing dir:
|
||||||
|
# $ find MC/X86 -name *.cs | ./test_mc.py
|
||||||
|
|
||||||
|
# To test just one input file, run test_mc.py with that file:
|
||||||
|
# $ ./test_mc.py MC/X86/x86-32-fma3.s.cs
|
Reference in New Issue
Block a user