x86: handle instructions with LOCK/REP/REPNE prefix after other prefixes. bear with this until we have a better approach
This commit is contained in:
parent
a88c1164bf
commit
2cff6f61fc
|
@ -13,8 +13,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/* Capstone Disassembler Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
|
||||
/* Capstone Disassembly Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
|
||||
|
||||
#include <stdio.h> // DEBUG
|
||||
#include <stdlib.h>
|
||||
|
@ -289,7 +289,8 @@ static DecodeStatus _getInstruction(cs_struct *ud, MCInst *MI,
|
|||
return MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool AArch64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info)
|
||||
bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
DecodeStatus status = _getInstruction((cs_struct *)ud, instr,
|
||||
code, code_len,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Capstone Disassembler Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
|
||||
/* Capstone Disassembly Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
|
||||
|
||||
#ifndef CS_AARCH64_DISASSEMBLER_H
|
||||
#define CS_AARCH64_DISASSEMBLER_H
|
||||
|
@ -12,7 +12,7 @@
|
|||
|
||||
void AArch64_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool AArch64_getInstruction(csh ud, const uint8_t *code, size_t code_len,
|
||||
bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -854,7 +854,7 @@ static DecodeStatus _Thumb_getInstruction(cs_struct *ud, MCInst *MI, const uint8
|
|||
return MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
|
||||
bool Thumb_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
DecodeStatus status = _Thumb_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);
|
||||
|
@ -863,7 +863,7 @@ bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
|
|||
return status != MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool ARM_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
|
||||
bool ARM_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
DecodeStatus status = _ARM_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
|
||||
void ARM_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
bool ARM_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
bool Thumb_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
bool Thumb_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
uint64_t ARM_getFeatureBits(int mode);
|
||||
|
||||
|
|
|
@ -280,7 +280,7 @@ static DecodeStatus MipsDisassembler_getInstruction(int mode, MCInst *instr,
|
|||
return MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
|
||||
bool Mips_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
|
||||
|
@ -294,7 +294,7 @@ bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *i
|
|||
}
|
||||
|
||||
static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr,
|
||||
unsigned char *code, size_t code_len,
|
||||
const uint8_t *code, size_t code_len,
|
||||
uint16_t *Size,
|
||||
uint64_t Address, bool isBigEndian, MCRegisterInfo *MRI)
|
||||
{
|
||||
|
@ -320,7 +320,7 @@ static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr,
|
|||
return MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool Mips64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr,
|
||||
bool Mips64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
|
||||
void Mips_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len,
|
||||
bool Mips_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
bool Mips64_getInstruction(csh handle, const uint8_t *code, size_t code_len,
|
||||
bool Mips64_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/* Capstone Disassembler Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
|
||||
/* Capstone Disassembly Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
|
||||
|
||||
#include <stdio.h> // DEBUG
|
||||
#include <stdlib.h>
|
||||
|
@ -299,7 +299,8 @@ static DecodeStatus getInstruction(MCInst *MI,
|
|||
return MCDisassembler_Fail;
|
||||
}
|
||||
|
||||
bool PPC_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info)
|
||||
bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
DecodeStatus status = getInstruction(instr,
|
||||
code, code_len,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Capstone Disassembler Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
|
||||
/* Capstone Disassembly Engine */
|
||||
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
|
||||
|
||||
#ifndef CS_PPCDISASSEMBLER_H
|
||||
#define CS_PPCDISASSEMBLER_H
|
||||
|
@ -12,7 +12,7 @@
|
|||
|
||||
void PPC_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool PPC_getInstruction(csh ud, const uint8_t *code, size_t code_len,
|
||||
bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -200,7 +200,7 @@ static DecodeStatus DecodeSWAP(MCInst *Inst, unsigned insn, uint64_t Address,
|
|||
#include "SparcGenDisassemblerTables.inc"
|
||||
|
||||
/// readInstruction - read four bytes and return 32 bit word.
|
||||
static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t *Insn)
|
||||
static DecodeStatus readInstruction32(const uint8_t *code, size_t len, uint32_t *Insn)
|
||||
{
|
||||
uint8_t Bytes[4];
|
||||
|
||||
|
@ -219,7 +219,7 @@ static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t
|
|||
return MCDisassembler_Success;
|
||||
}
|
||||
|
||||
bool Sparc_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI,
|
||||
bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
uint32_t Insn;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
void Sparc_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool Sparc_getInstruction(csh ud, const uint8_t *code, size_t code_len,
|
||||
bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -295,7 +295,7 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst *Inst, uint64_t Fiel
|
|||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "SystemZGenSubtargetInfo.inc"
|
||||
#include "SystemZGenDisassemblerTables.inc"
|
||||
bool SystemZ_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI,
|
||||
bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
|
||||
uint16_t *size, uint64_t address, void *info)
|
||||
{
|
||||
uint64_t Inst;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
void SystemZ_init(MCRegisterInfo *MRI);
|
||||
|
||||
bool SystemZ_getInstruction(csh ud, const uint8_t *code, size_t code_len,
|
||||
bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -720,16 +720,104 @@ static void update_pub_insn(cs_insn_flat *pub, InternalInstruction *inter)
|
|||
pub->x86.sib_base = x86_map_sib_base(inter->sibBase);
|
||||
}
|
||||
|
||||
// classify a byte intn prefix group (or 0 if it is not a prefix)
|
||||
static uint8_t prefix_group(uint8_t c)
|
||||
{
|
||||
switch (c) {
|
||||
default:
|
||||
return 0;
|
||||
case 0xf0: // lock
|
||||
case 0xf2: // repne
|
||||
case 0xf3: // rep
|
||||
return 1;
|
||||
case 0x2e: // CS segment override, or branch not taken (Jcc)
|
||||
case 0x36: // SS segment override
|
||||
case 0x3e: // DS segment override, or branch taken (Jcc)
|
||||
case 0x26: // ES segment override
|
||||
case 0x64: // FS segment override
|
||||
case 0x65: // GS segment override
|
||||
return 2;
|
||||
case 0x66: // operand-size override
|
||||
return 3;
|
||||
case 0x67: // address-size override
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Public interface for the disassembler
|
||||
bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *_info)
|
||||
bool X86_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *_info)
|
||||
{
|
||||
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
|
||||
InternalInstruction insn;
|
||||
struct reader_info info;
|
||||
int ret;
|
||||
bool result;
|
||||
size_t i;
|
||||
int count = 0;
|
||||
uint8_t p;
|
||||
uint8_t *buffer;
|
||||
|
||||
info.code = code;
|
||||
// hack: shuffle LOCK/REP/REPNE prefixes to the front.
|
||||
// this is because LLVM make a cut at these prefixes to create a new insn.
|
||||
if (*modcode != NULL)
|
||||
// so we actually work on the modified buffer
|
||||
buffer = *modcode;
|
||||
else
|
||||
buffer = (uint8_t *)code;
|
||||
|
||||
// find the first non-prefix byte
|
||||
for (i = 0; i < code_len; i++) {
|
||||
p = prefix_group(buffer[i]);
|
||||
if (p == 1)
|
||||
count++;
|
||||
else if (p == 0) {
|
||||
// the first ever non-prefix byte
|
||||
// ignore if there is no prefix from Group 1 (LOCK/REP/REPNE)
|
||||
if (i == 0 || count == 0)
|
||||
break;
|
||||
else {
|
||||
// x86 instruction has no more than 16 bytes
|
||||
uint8_t b1, b2;
|
||||
size_t j;
|
||||
uint8_t *prefixes;
|
||||
|
||||
// create @modcode for modifying if we didnt do that before
|
||||
if (*modcode == NULL) {
|
||||
uint8_t *tmpbuf = cs_mem_malloc(code_len);
|
||||
// copy @code to @modcode
|
||||
memcpy(tmpbuf, code, code_len);
|
||||
buffer = tmpbuf;
|
||||
*modcode = tmpbuf;
|
||||
}
|
||||
|
||||
// save all prefix bytes in original code
|
||||
prefixes = cs_mem_malloc(i);
|
||||
memcpy(prefixes, buffer, i);
|
||||
|
||||
b1 = 0;
|
||||
b2 = count;
|
||||
for (j = 0; j < i; j++) {
|
||||
if (prefix_group(prefixes[j]) == 1) {
|
||||
// this is one of LOCK/REP/REPNE, so put it at the front
|
||||
buffer[b1] = prefixes[j];
|
||||
b1++;
|
||||
} else {
|
||||
// put this prefix at the back, after LOCK/REP/REPNE
|
||||
buffer[b2] = prefixes[j];
|
||||
b2++;
|
||||
}
|
||||
}
|
||||
|
||||
cs_mem_free(prefixes);
|
||||
|
||||
// done, break out of this loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.code = buffer;
|
||||
info.size = code_len;
|
||||
info.offset = address;
|
||||
|
||||
|
@ -753,6 +841,7 @@ bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *in
|
|||
|
||||
if (ret) {
|
||||
*size = (uint16_t)(insn.readerCursor - address);
|
||||
|
||||
return false;
|
||||
} else {
|
||||
*size = (uint16_t)insn.length;
|
||||
|
|
|
@ -95,7 +95,7 @@
|
|||
#undef INSTRUCTION_SPECIFIER_FIELDS
|
||||
#undef INSTRUCTION_IDS
|
||||
|
||||
bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len,
|
||||
bool X86_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
|
||||
MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
#endif
|
||||
|
|
17
cs.c
17
cs.c
|
@ -419,6 +419,7 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
|
|||
void *tmp;
|
||||
size_t skipdata_bytes;
|
||||
uint64_t offset_org;
|
||||
uint8_t *tmpbuf = NULL, *org_tmpbuf = NULL;
|
||||
|
||||
if (!handle) {
|
||||
// FIXME: how to handle this case:
|
||||
|
@ -433,12 +434,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
|
|||
|
||||
memset(insn_cache, 0, sizeof(insn_cache));
|
||||
|
||||
// save the original offset for SKIPDATA
|
||||
offset_org = offset;
|
||||
|
||||
while (size > 0) {
|
||||
MCInst_Init(&mci);
|
||||
mci.csh = handle;
|
||||
|
||||
r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
|
||||
r = handle->disasm(ud, buffer, &tmpbuf, size, &mci, &insn_size, offset, handle->getinsn_info);
|
||||
if (r) {
|
||||
SStream ss;
|
||||
SStream_Init(&ss);
|
||||
|
@ -486,6 +489,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
|
|||
}
|
||||
|
||||
buffer += insn_size;
|
||||
if (tmpbuf != NULL) {
|
||||
// save the original tmpbuf to free it later
|
||||
if (org_tmpbuf == NULL)
|
||||
org_tmpbuf = tmpbuf;
|
||||
|
||||
tmpbuf += insn_size;
|
||||
}
|
||||
|
||||
size -= insn_size;
|
||||
offset += insn_size;
|
||||
|
||||
|
@ -564,6 +575,10 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
|
|||
}
|
||||
}
|
||||
|
||||
// free tmpbuf if it was allocated in @disasm
|
||||
if (org_tmpbuf)
|
||||
cs_mem_free(org_tmpbuf);
|
||||
|
||||
if (f) {
|
||||
// resize total to contain newly disasm insns
|
||||
void *tmp = cs_mem_realloc(total, total_size + f * sizeof(insn_cache[0]));
|
||||
|
|
|
@ -15,7 +15,7 @@ typedef void (*Printer_t)(MCInst *MI, SStream *OS, void *info);
|
|||
// this is the best time to gather insn's characteristics
|
||||
typedef void (*PostPrinter_t)(csh handle, cs_insn *, char *mnem);
|
||||
|
||||
typedef bool (*Disasm_t)(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
typedef bool (*Disasm_t)(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
|
||||
|
||||
typedef const char *(*GetName_t)(csh handle, unsigned int reg);
|
||||
|
||||
|
|
Loading…
Reference in New Issue