suite: update test_mc.py to better handle output of different formats of MC & CS
This commit is contained in:
parent
6999d22892
commit
df7dde26c9
111
suite/test_mc.py
111
suite/test_mc.py
|
@ -4,17 +4,44 @@ import array, os.path, sys
|
||||||
from subprocess import Popen, PIPE, STDOUT
|
from subprocess import Popen, PIPE, STDOUT
|
||||||
from capstone import *
|
from capstone import *
|
||||||
|
|
||||||
|
|
||||||
|
# convert all hex numbers to decimal numbers in a text
|
||||||
|
def normalize_hex(a):
|
||||||
|
while(True):
|
||||||
|
i = a.find('0x')
|
||||||
|
if i == -1: # no more hex number
|
||||||
|
break
|
||||||
|
hexnum = '0x'
|
||||||
|
for c in a[i + 2:]:
|
||||||
|
if c in '0123456789abcdefABCDEF':
|
||||||
|
hexnum += c
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
num = int(hexnum, 16)
|
||||||
|
a = a.replace(hexnum, str(num))
|
||||||
|
return a
|
||||||
|
|
||||||
|
|
||||||
def run_mc(arch, hexcode, option, syntax=None):
|
def run_mc(arch, hexcode, option, syntax=None):
|
||||||
def normalize(text):
|
def normalize(text):
|
||||||
# remove tabs
|
# remove tabs
|
||||||
|
text = text.lower()
|
||||||
items = text.split()
|
items = text.split()
|
||||||
text = ' '.join(items)
|
text = ' '.join(items)
|
||||||
if arch == CS_ARCH_X86:
|
if arch == CS_ARCH_X86:
|
||||||
# remove comment after #
|
# remove comment after #
|
||||||
i = text.find('# ')
|
i = text.find('# ')
|
||||||
if i != -1:
|
if i != -1:
|
||||||
return text[:i].lower()
|
return text[:i].strip()
|
||||||
return text.lower()
|
if arch == CS_ARCH_ARM64:
|
||||||
|
# remove comment after #
|
||||||
|
i = text.find('// ')
|
||||||
|
if i != -1:
|
||||||
|
return text[:i].strip()
|
||||||
|
# remove some redundant spaces
|
||||||
|
text = text.replace('{ ', '{')
|
||||||
|
text = text.replace(' }', '}')
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
#print("Trying to decode: %s" %hexcode)
|
#print("Trying to decode: %s" %hexcode)
|
||||||
if syntax:
|
if syntax:
|
||||||
|
@ -32,7 +59,7 @@ def run_mc(arch, hexcode, option, syntax=None):
|
||||||
#print lines
|
#print lines
|
||||||
if 'invalid' in lines[0]:
|
if 'invalid' in lines[0]:
|
||||||
#print 'invalid ----'
|
#print 'invalid ----'
|
||||||
return 'FAILED to disassemble'
|
return 'FAILED to disassemble (MC)'
|
||||||
else:
|
else:
|
||||||
#print 'OK:', lines[1]
|
#print 'OK:', lines[1]
|
||||||
return normalize(lines[1].strip())
|
return normalize(lines[1].strip())
|
||||||
|
@ -73,6 +100,9 @@ def test_file(fname):
|
||||||
"0": CS_MODE_ARM,
|
"0": CS_MODE_ARM,
|
||||||
"CS_MODE_ARM": CS_MODE_ARM,
|
"CS_MODE_ARM": CS_MODE_ARM,
|
||||||
"CS_MODE_THUMB": CS_MODE_THUMB,
|
"CS_MODE_THUMB": CS_MODE_THUMB,
|
||||||
|
"CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
|
||||||
|
"CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
|
||||||
|
"CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
|
||||||
"CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
|
"CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
|
||||||
"CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
|
"CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
|
||||||
"CS_MODE_32+CS_MODE_BIG_ENDIAN": CS_MODE_32+CS_MODE_BIG_ENDIAN,
|
"CS_MODE_32+CS_MODE_BIG_ENDIAN": CS_MODE_32+CS_MODE_BIG_ENDIAN,
|
||||||
|
@ -94,7 +124,10 @@ def test_file(fname):
|
||||||
("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
|
("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
|
||||||
("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
|
("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
|
||||||
("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
|
("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
|
||||||
("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=armv7'],
|
("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
|
||||||
|
("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
|
||||||
|
("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
|
||||||
|
("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
|
||||||
("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
|
("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
|
||||||
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
|
||||||
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
|
("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
|
||||||
|
@ -106,7 +139,7 @@ def test_file(fname):
|
||||||
("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
|
("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
|
||||||
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
|
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
|
||||||
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
|
('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
|
||||||
('CS_ARCH_SYSZ', '0'): ['-triple=s390x'],
|
('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
|
||||||
}
|
}
|
||||||
|
|
||||||
#if not option in ('', 'None'):
|
#if not option in ('', 'None'):
|
||||||
|
@ -120,15 +153,19 @@ def test_file(fname):
|
||||||
# tell llvm-mc to use Intel syntax
|
# tell llvm-mc to use Intel syntax
|
||||||
mc_option = '-output-asm-variant=1'
|
mc_option = '-output-asm-variant=1'
|
||||||
|
|
||||||
if arch == 'CS_ARCH_ARM':
|
if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
|
||||||
md.syntax = CS_OPT_SYNTAX_NOREGNAME
|
md.syntax = CS_OPT_SYNTAX_NOREGNAME
|
||||||
|
|
||||||
|
if fname.endswith('3DNow.s.cs'):
|
||||||
|
md.syntax = CS_OPT_SYNTAX_ATT
|
||||||
|
|
||||||
for line in lines[1:]:
|
for line in lines[1:]:
|
||||||
# ignore all the input lines having # in front.
|
# ignore all the input lines having # in front.
|
||||||
if line.startswith('#'):
|
if line.startswith('#'):
|
||||||
continue
|
continue
|
||||||
#print("Check %s" %line)
|
#print("Check %s" %line)
|
||||||
code = line.split(' = ')[0]
|
code = line.split(' = ')[0]
|
||||||
|
asm = ''.join(line.split(' = ')[1:])
|
||||||
hex_code = code.replace('0x', '')
|
hex_code = code.replace('0x', '')
|
||||||
hex_code = hex_code.replace(',', '')
|
hex_code = hex_code.replace(',', '')
|
||||||
hex_data = hex_code.decode('hex')
|
hex_data = hex_code.decode('hex')
|
||||||
|
@ -143,11 +180,63 @@ def test_file(fname):
|
||||||
else:
|
else:
|
||||||
cs_output = 'FAILED to disassemble'
|
cs_output = 'FAILED to disassemble'
|
||||||
|
|
||||||
mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
|
cs_output2 = normalize_hex(cs_output)
|
||||||
if (cs_output != mc_output):
|
cs_output2 = cs_output2.replace(' ', '')
|
||||||
print("Mismatch: %s" %code)
|
|
||||||
print("\tMC = %s" %mc_output)
|
if arch == 'CS_ARCH_MIPS':
|
||||||
print("\tCS = %s" %cs_output)
|
# normalize register alias names
|
||||||
|
cs_output2 = cs_output2.replace('$at', '$1')
|
||||||
|
cs_output2 = cs_output2.replace('$v0', '$2')
|
||||||
|
cs_output2 = cs_output2.replace('$v1', '$3')
|
||||||
|
|
||||||
|
cs_output2 = cs_output2.replace('$a0', '$4')
|
||||||
|
cs_output2 = cs_output2.replace('$a1', '$5')
|
||||||
|
cs_output2 = cs_output2.replace('$a2', '$6')
|
||||||
|
cs_output2 = cs_output2.replace('$a3', '$7')
|
||||||
|
|
||||||
|
cs_output2 = cs_output2.replace('$t0', '$8')
|
||||||
|
cs_output2 = cs_output2.replace('$t1', '$9')
|
||||||
|
cs_output2 = cs_output2.replace('$t2', '$10')
|
||||||
|
cs_output2 = cs_output2.replace('$t3', '$11')
|
||||||
|
cs_output2 = cs_output2.replace('$t4', '$12')
|
||||||
|
cs_output2 = cs_output2.replace('$t5', '$13')
|
||||||
|
cs_output2 = cs_output2.replace('$t6', '$14')
|
||||||
|
cs_output2 = cs_output2.replace('$t7', '$15')
|
||||||
|
cs_output2 = cs_output2.replace('$t8', '$24')
|
||||||
|
cs_output2 = cs_output2.replace('$t9', '$25')
|
||||||
|
|
||||||
|
cs_output2 = cs_output2.replace('$s0', '$16')
|
||||||
|
cs_output2 = cs_output2.replace('$s1', '$17')
|
||||||
|
cs_output2 = cs_output2.replace('$s2', '$18')
|
||||||
|
cs_output2 = cs_output2.replace('$s3', '$19')
|
||||||
|
cs_output2 = cs_output2.replace('$s4', '$20')
|
||||||
|
cs_output2 = cs_output2.replace('$s5', '$21')
|
||||||
|
cs_output2 = cs_output2.replace('$s6', '$22')
|
||||||
|
cs_output2 = cs_output2.replace('$s7', '$23')
|
||||||
|
|
||||||
|
cs_output2 = cs_output2.replace('$k0', '$26')
|
||||||
|
cs_output2 = cs_output2.replace('$k1', '$27')
|
||||||
|
|
||||||
|
#print("Running MC ...")
|
||||||
|
if fname.endswith('thumb-fp-armv8.s.cs'):
|
||||||
|
mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option)
|
||||||
|
elif fname.endswith('mips64-alu-instructions.s.cs'):
|
||||||
|
mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option)
|
||||||
|
else:
|
||||||
|
mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
|
||||||
|
mc_output2 = normalize_hex(mc_output)
|
||||||
|
if arch == 'CS_ARCH_MIPS':
|
||||||
|
mc_output2 = mc_output2.replace(' 0(', '(')
|
||||||
|
mc_output2 = mc_output2.replace(' ', '')
|
||||||
|
mc_output2 = mc_output2.replace('opaque', '')
|
||||||
|
|
||||||
|
|
||||||
|
if (cs_output2 != mc_output2):
|
||||||
|
asm = asm.replace(' ', '').strip().lower()
|
||||||
|
if asm != cs_output2:
|
||||||
|
print("Mismatch: %s" %line.strip())
|
||||||
|
print("\tMC = %s" %mc_output)
|
||||||
|
print("\tCS = %s" %cs_output)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue