diff --git a/arch/AArch64/AArch64Disassembler.c b/arch/AArch64/AArch64Disassembler.c index 9bcac860..2bf51363 100644 --- a/arch/AArch64/AArch64Disassembler.c +++ b/arch/AArch64/AArch64Disassembler.c @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -/* Capstone Disassembler Engine */ -/* By Nguyen Anh Quynh , 2013> */ +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2014 */ #include // DEBUG #include @@ -289,7 +289,8 @@ static DecodeStatus _getInstruction(cs_struct *ud, MCInst *MI, return MCDisassembler_Fail; } -bool AArch64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) +bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, void *info) { DecodeStatus status = _getInstruction((cs_struct *)ud, instr, code, code_len, diff --git a/arch/AArch64/AArch64Disassembler.h b/arch/AArch64/AArch64Disassembler.h index 1318a681..a8c1d652 100644 --- a/arch/AArch64/AArch64Disassembler.h +++ b/arch/AArch64/AArch64Disassembler.h @@ -1,5 +1,5 @@ -/* Capstone Disassembler Engine */ -/* By Nguyen Anh Quynh , 2013> */ +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2014 */ #ifndef CS_AARCH64_DISASSEMBLER_H #define CS_AARCH64_DISASSEMBLER_H @@ -12,7 +12,7 @@ void AArch64_init(MCRegisterInfo *MRI); -bool AArch64_getInstruction(csh ud, const uint8_t *code, size_t code_len, +bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/arch/ARM/ARMDisassembler.c b/arch/ARM/ARMDisassembler.c index f8b460c7..cc44c09d 100644 --- a/arch/ARM/ARMDisassembler.c +++ b/arch/ARM/ARMDisassembler.c @@ -854,7 +854,7 @@ static DecodeStatus _Thumb_getInstruction(cs_struct *ud, MCInst *MI, const uint8 return MCDisassembler_Fail; } -bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, +bool Thumb_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) { DecodeStatus status = _Thumb_getInstruction((cs_struct *)ud, instr, code, code_len, size, address); @@ -863,7 +863,7 @@ bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst * return status != MCDisassembler_Fail; } -bool ARM_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, +bool ARM_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) { DecodeStatus status = _ARM_getInstruction((cs_struct *)ud, instr, code, code_len, size, address); diff --git a/arch/ARM/ARMDisassembler.h b/arch/ARM/ARMDisassembler.h index 585c6a44..606481e7 100644 --- a/arch/ARM/ARMDisassembler.h +++ b/arch/ARM/ARMDisassembler.h @@ -9,9 +9,9 @@ void ARM_init(MCRegisterInfo *MRI); -bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); +bool ARM_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); -bool Thumb_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); +bool Thumb_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); uint64_t ARM_getFeatureBits(int mode); diff --git a/arch/Mips/MipsDisassembler.c b/arch/Mips/MipsDisassembler.c index f880a7f0..e14a67be 100644 --- a/arch/Mips/MipsDisassembler.c +++ b/arch/Mips/MipsDisassembler.c @@ -280,7 +280,7 @@ static DecodeStatus MipsDisassembler_getInstruction(int mode, MCInst *instr, return MCDisassembler_Fail; } -bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, +bool Mips_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) { cs_struct *handle = (cs_struct *)(uintptr_t)ud; @@ -294,7 +294,7 @@ bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *i } static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr, - unsigned char *code, size_t code_len, + const uint8_t *code, size_t code_len, uint16_t *Size, uint64_t Address, bool isBigEndian, MCRegisterInfo *MRI) { @@ -320,7 +320,7 @@ static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr, return MCDisassembler_Fail; } -bool Mips64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, +bool Mips64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) { cs_struct *handle = (cs_struct *)(uintptr_t)ud; diff --git a/arch/Mips/MipsDisassembler.h b/arch/Mips/MipsDisassembler.h index df50d46c..8f19ba53 100644 --- a/arch/Mips/MipsDisassembler.h +++ b/arch/Mips/MipsDisassembler.h @@ -11,10 +11,10 @@ void Mips_init(MCRegisterInfo *MRI); -bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len, +bool Mips_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); -bool Mips64_getInstruction(csh handle, const uint8_t *code, size_t code_len, +bool Mips64_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/arch/PowerPC/PPCDisassembler.c b/arch/PowerPC/PPCDisassembler.c index 518a4a3d..da4aedae 100644 --- a/arch/PowerPC/PPCDisassembler.c +++ b/arch/PowerPC/PPCDisassembler.c @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -/* Capstone Disassembler Engine */ -/* By Nguyen Anh Quynh , 2013> */ +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2014 */ #include // DEBUG #include @@ -299,7 +299,8 @@ static DecodeStatus getInstruction(MCInst *MI, return MCDisassembler_Fail; } -bool PPC_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info) +bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, void *info) { DecodeStatus status = getInstruction(instr, code, code_len, diff --git a/arch/PowerPC/PPCDisassembler.h b/arch/PowerPC/PPCDisassembler.h index f9e670a7..f05522a5 100644 --- a/arch/PowerPC/PPCDisassembler.h +++ b/arch/PowerPC/PPCDisassembler.h @@ -1,5 +1,5 @@ -/* Capstone Disassembler Engine */ -/* By Nguyen Anh Quynh , 2013> */ +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2014 */ #ifndef CS_PPCDISASSEMBLER_H #define CS_PPCDISASSEMBLER_H @@ -12,7 +12,7 @@ void PPC_init(MCRegisterInfo *MRI); -bool PPC_getInstruction(csh ud, const uint8_t *code, size_t code_len, +bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/arch/Sparc/SparcDisassembler.c b/arch/Sparc/SparcDisassembler.c index 77821b4d..1601c5d0 100644 --- a/arch/Sparc/SparcDisassembler.c +++ b/arch/Sparc/SparcDisassembler.c @@ -200,7 +200,7 @@ static DecodeStatus DecodeSWAP(MCInst *Inst, unsigned insn, uint64_t Address, #include "SparcGenDisassemblerTables.inc" /// readInstruction - read four bytes and return 32 bit word. -static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t *Insn) +static DecodeStatus readInstruction32(const uint8_t *code, size_t len, uint32_t *Insn) { uint8_t Bytes[4]; @@ -219,7 +219,7 @@ static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t return MCDisassembler_Success; } -bool Sparc_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI, +bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI, uint16_t *size, uint64_t address, void *info) { uint32_t Insn; diff --git a/arch/Sparc/SparcDisassembler.h b/arch/Sparc/SparcDisassembler.h index bafa2307..d36735fc 100644 --- a/arch/Sparc/SparcDisassembler.h +++ b/arch/Sparc/SparcDisassembler.h @@ -12,7 +12,7 @@ void Sparc_init(MCRegisterInfo *MRI); -bool Sparc_getInstruction(csh ud, const uint8_t *code, size_t code_len, +bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/arch/SystemZ/SystemZDisassembler.c b/arch/SystemZ/SystemZDisassembler.c index af01f749..f86c5ad4 100644 --- a/arch/SystemZ/SystemZDisassembler.c +++ b/arch/SystemZ/SystemZDisassembler.c @@ -295,7 +295,7 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst *Inst, uint64_t Fiel #define GET_SUBTARGETINFO_ENUM #include "SystemZGenSubtargetInfo.inc" #include "SystemZGenDisassemblerTables.inc" -bool SystemZ_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI, +bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI, uint16_t *size, uint64_t address, void *info) { uint64_t Inst; diff --git a/arch/SystemZ/SystemZDisassembler.h b/arch/SystemZ/SystemZDisassembler.h index e77a7385..37527914 100644 --- a/arch/SystemZ/SystemZDisassembler.h +++ b/arch/SystemZ/SystemZDisassembler.h @@ -12,7 +12,7 @@ void SystemZ_init(MCRegisterInfo *MRI); -bool SystemZ_getInstruction(csh ud, const uint8_t *code, size_t code_len, +bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/arch/X86/X86Disassembler.c b/arch/X86/X86Disassembler.c index 6c0b2e06..223d4d08 100644 --- a/arch/X86/X86Disassembler.c +++ b/arch/X86/X86Disassembler.c @@ -720,16 +720,104 @@ static void update_pub_insn(cs_insn_flat *pub, InternalInstruction *inter) pub->x86.sib_base = x86_map_sib_base(inter->sibBase); } +// classify a byte intn prefix group (or 0 if it is not a prefix) +static uint8_t prefix_group(uint8_t c) +{ + switch (c) { + default: + return 0; + case 0xf0: // lock + case 0xf2: // repne + case 0xf3: // rep + return 1; + case 0x2e: // CS segment override, or branch not taken (Jcc) + case 0x36: // SS segment override + case 0x3e: // DS segment override, or branch taken (Jcc) + case 0x26: // ES segment override + case 0x64: // FS segment override + case 0x65: // GS segment override + return 2; + case 0x66: // operand-size override + return 3; + case 0x67: // address-size override + return 4; + } +} + // Public interface for the disassembler -bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *_info) +bool X86_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, void *_info) { cs_struct *handle = (cs_struct *)(uintptr_t)ud; InternalInstruction insn; struct reader_info info; int ret; bool result; + size_t i; + int count = 0; + uint8_t p; + uint8_t *buffer; - info.code = code; + // hack: shuffle LOCK/REP/REPNE prefixes to the front. + // this is because LLVM make a cut at these prefixes to create a new insn. + if (*modcode != NULL) + // so we actually work on the modified buffer + buffer = *modcode; + else + buffer = (uint8_t *)code; + + // find the first non-prefix byte + for (i = 0; i < code_len; i++) { + p = prefix_group(buffer[i]); + if (p == 1) + count++; + else if (p == 0) { + // the first ever non-prefix byte + // ignore if there is no prefix from Group 1 (LOCK/REP/REPNE) + if (i == 0 || count == 0) + break; + else { + // x86 instruction has no more than 16 bytes + uint8_t b1, b2; + size_t j; + uint8_t *prefixes; + + // create @modcode for modifying if we didnt do that before + if (*modcode == NULL) { + uint8_t *tmpbuf = cs_mem_malloc(code_len); + // copy @code to @modcode + memcpy(tmpbuf, code, code_len); + buffer = tmpbuf; + *modcode = tmpbuf; + } + + // save all prefix bytes in original code + prefixes = cs_mem_malloc(i); + memcpy(prefixes, buffer, i); + + b1 = 0; + b2 = count; + for (j = 0; j < i; j++) { + if (prefix_group(prefixes[j]) == 1) { + // this is one of LOCK/REP/REPNE, so put it at the front + buffer[b1] = prefixes[j]; + b1++; + } else { + // put this prefix at the back, after LOCK/REP/REPNE + buffer[b2] = prefixes[j]; + b2++; + } + } + + cs_mem_free(prefixes); + + // done, break out of this loop + break; + } + } + } + + info.code = buffer; info.size = code_len; info.offset = address; @@ -753,6 +841,7 @@ bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *in if (ret) { *size = (uint16_t)(insn.readerCursor - address); + return false; } else { *size = (uint16_t)insn.length; diff --git a/arch/X86/X86Disassembler.h b/arch/X86/X86Disassembler.h index 7c1f47c0..8a851909 100644 --- a/arch/X86/X86Disassembler.h +++ b/arch/X86/X86Disassembler.h @@ -95,7 +95,7 @@ #undef INSTRUCTION_SPECIFIER_FIELDS #undef INSTRUCTION_IDS -bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len, +bool X86_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); #endif diff --git a/cs.c b/cs.c index c9bc135b..4cd9e5f6 100644 --- a/cs.c +++ b/cs.c @@ -419,6 +419,7 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, void *tmp; size_t skipdata_bytes; uint64_t offset_org; + uint8_t *tmpbuf = NULL, *org_tmpbuf = NULL; if (!handle) { // FIXME: how to handle this case: @@ -433,12 +434,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, memset(insn_cache, 0, sizeof(insn_cache)); + // save the original offset for SKIPDATA offset_org = offset; + while (size > 0) { MCInst_Init(&mci); mci.csh = handle; - r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info); + r = handle->disasm(ud, buffer, &tmpbuf, size, &mci, &insn_size, offset, handle->getinsn_info); if (r) { SStream ss; SStream_Init(&ss); @@ -486,6 +489,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, } buffer += insn_size; + if (tmpbuf != NULL) { + // save the original tmpbuf to free it later + if (org_tmpbuf == NULL) + org_tmpbuf = tmpbuf; + + tmpbuf += insn_size; + } + size -= insn_size; offset += insn_size; @@ -564,6 +575,10 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, } } + // free tmpbuf if it was allocated in @disasm + if (org_tmpbuf) + cs_mem_free(org_tmpbuf); + if (f) { // resize total to contain newly disasm insns void *tmp = cs_mem_realloc(total, total_size + f * sizeof(insn_cache[0])); diff --git a/cs_priv.h b/cs_priv.h index f2fec45a..983d43c1 100644 --- a/cs_priv.h +++ b/cs_priv.h @@ -15,7 +15,7 @@ typedef void (*Printer_t)(MCInst *MI, SStream *OS, void *info); // this is the best time to gather insn's characteristics typedef void (*PostPrinter_t)(csh handle, cs_insn *, char *mnem); -typedef bool (*Disasm_t)(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); +typedef bool (*Disasm_t)(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info); typedef const char *(*GetName_t)(csh handle, unsigned int reg);