x86: handle instructions with LOCK/REP/REPNE prefix after other prefixes. bear with this until we have a better approach

This commit is contained in:
Nguyen Anh Quynh 2014-04-28 11:19:44 +08:00
parent a88c1164bf
commit 2cff6f61fc
16 changed files with 137 additions and 31 deletions

View File

@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
#include <stdio.h> // DEBUG
#include <stdlib.h>
@ -289,7 +289,8 @@ static DecodeStatus _getInstruction(cs_struct *ud, MCInst *MI,
return MCDisassembler_Fail;
}
bool AArch64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info)
bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info)
{
DecodeStatus status = _getInstruction((cs_struct *)ud, instr,
code, code_len,

View File

@ -1,5 +1,5 @@
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
#ifndef CS_AARCH64_DISASSEMBLER_H
#define CS_AARCH64_DISASSEMBLER_H
@ -12,7 +12,7 @@
void AArch64_init(MCRegisterInfo *MRI);
bool AArch64_getInstruction(csh ud, const uint8_t *code, size_t code_len,
bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

View File

@ -854,7 +854,7 @@ static DecodeStatus _Thumb_getInstruction(cs_struct *ud, MCInst *MI, const uint8
return MCDisassembler_Fail;
}
bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
bool Thumb_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
uint16_t *size, uint64_t address, void *info)
{
DecodeStatus status = _Thumb_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);
@ -863,7 +863,7 @@ bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *
return status != MCDisassembler_Fail;
}
bool ARM_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
bool ARM_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
uint16_t *size, uint64_t address, void *info)
{
DecodeStatus status = _ARM_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);

View File

@ -9,9 +9,9 @@
void ARM_init(MCRegisterInfo *MRI);
bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
bool ARM_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
bool Thumb_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
bool Thumb_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
uint64_t ARM_getFeatureBits(int mode);

View File

@ -280,7 +280,7 @@ static DecodeStatus MipsDisassembler_getInstruction(int mode, MCInst *instr,
return MCDisassembler_Fail;
}
bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
bool Mips_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
uint16_t *size, uint64_t address, void *info)
{
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
@ -294,7 +294,7 @@ bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *i
}
static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr,
unsigned char *code, size_t code_len,
const uint8_t *code, size_t code_len,
uint16_t *Size,
uint64_t Address, bool isBigEndian, MCRegisterInfo *MRI)
{
@ -320,7 +320,7 @@ static DecodeStatus Mips64Disassembler_getInstruction(int mode, MCInst *instr,
return MCDisassembler_Fail;
}
bool Mips64_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr,
bool Mips64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
uint16_t *size, uint64_t address, void *info)
{
cs_struct *handle = (cs_struct *)(uintptr_t)ud;

View File

@ -11,10 +11,10 @@
void Mips_init(MCRegisterInfo *MRI);
bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len,
bool Mips_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
bool Mips64_getInstruction(csh handle, const uint8_t *code, size_t code_len,
bool Mips64_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

View File

@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
#include <stdio.h> // DEBUG
#include <stdlib.h>
@ -299,7 +299,8 @@ static DecodeStatus getInstruction(MCInst *MI,
return MCDisassembler_Fail;
}
bool PPC_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info)
bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info)
{
DecodeStatus status = getInstruction(instr,
code, code_len,

View File

@ -1,5 +1,5 @@
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
#ifndef CS_PPCDISASSEMBLER_H
#define CS_PPCDISASSEMBLER_H
@ -12,7 +12,7 @@
void PPC_init(MCRegisterInfo *MRI);
bool PPC_getInstruction(csh ud, const uint8_t *code, size_t code_len,
bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

View File

@ -200,7 +200,7 @@ static DecodeStatus DecodeSWAP(MCInst *Inst, unsigned insn, uint64_t Address,
#include "SparcGenDisassemblerTables.inc"
/// readInstruction - read four bytes and return 32 bit word.
static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t *Insn)
static DecodeStatus readInstruction32(const uint8_t *code, size_t len, uint32_t *Insn)
{
uint8_t Bytes[4];
@ -219,7 +219,7 @@ static DecodeStatus readInstruction32(unsigned char *code, size_t len, uint32_t
return MCDisassembler_Success;
}
bool Sparc_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI,
bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
uint16_t *size, uint64_t address, void *info)
{
uint32_t Insn;

View File

@ -12,7 +12,7 @@
void Sparc_init(MCRegisterInfo *MRI);
bool Sparc_getInstruction(csh ud, const uint8_t *code, size_t code_len,
bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

View File

@ -295,7 +295,7 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst *Inst, uint64_t Fiel
#define GET_SUBTARGETINFO_ENUM
#include "SystemZGenSubtargetInfo.inc"
#include "SystemZGenDisassemblerTables.inc"
bool SystemZ_getInstruction(csh ud, unsigned char *code, size_t code_len, MCInst *MI,
bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
uint16_t *size, uint64_t address, void *info)
{
uint64_t Inst;

View File

@ -12,7 +12,7 @@
void SystemZ_init(MCRegisterInfo *MRI);
bool SystemZ_getInstruction(csh ud, const uint8_t *code, size_t code_len,
bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

View File

@ -720,16 +720,104 @@ static void update_pub_insn(cs_insn_flat *pub, InternalInstruction *inter)
pub->x86.sib_base = x86_map_sib_base(inter->sibBase);
}
// classify a byte intn prefix group (or 0 if it is not a prefix)
static uint8_t prefix_group(uint8_t c)
{
switch (c) {
default:
return 0;
case 0xf0: // lock
case 0xf2: // repne
case 0xf3: // rep
return 1;
case 0x2e: // CS segment override, or branch not taken (Jcc)
case 0x36: // SS segment override
case 0x3e: // DS segment override, or branch taken (Jcc)
case 0x26: // ES segment override
case 0x64: // FS segment override
case 0x65: // GS segment override
return 2;
case 0x66: // operand-size override
return 3;
case 0x67: // address-size override
return 4;
}
}
// Public interface for the disassembler
bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *_info)
bool X86_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *_info)
{
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
InternalInstruction insn;
struct reader_info info;
int ret;
bool result;
size_t i;
int count = 0;
uint8_t p;
uint8_t *buffer;
info.code = code;
// hack: shuffle LOCK/REP/REPNE prefixes to the front.
// this is because LLVM make a cut at these prefixes to create a new insn.
if (*modcode != NULL)
// so we actually work on the modified buffer
buffer = *modcode;
else
buffer = (uint8_t *)code;
// find the first non-prefix byte
for (i = 0; i < code_len; i++) {
p = prefix_group(buffer[i]);
if (p == 1)
count++;
else if (p == 0) {
// the first ever non-prefix byte
// ignore if there is no prefix from Group 1 (LOCK/REP/REPNE)
if (i == 0 || count == 0)
break;
else {
// x86 instruction has no more than 16 bytes
uint8_t b1, b2;
size_t j;
uint8_t *prefixes;
// create @modcode for modifying if we didnt do that before
if (*modcode == NULL) {
uint8_t *tmpbuf = cs_mem_malloc(code_len);
// copy @code to @modcode
memcpy(tmpbuf, code, code_len);
buffer = tmpbuf;
*modcode = tmpbuf;
}
// save all prefix bytes in original code
prefixes = cs_mem_malloc(i);
memcpy(prefixes, buffer, i);
b1 = 0;
b2 = count;
for (j = 0; j < i; j++) {
if (prefix_group(prefixes[j]) == 1) {
// this is one of LOCK/REP/REPNE, so put it at the front
buffer[b1] = prefixes[j];
b1++;
} else {
// put this prefix at the back, after LOCK/REP/REPNE
buffer[b2] = prefixes[j];
b2++;
}
}
cs_mem_free(prefixes);
// done, break out of this loop
break;
}
}
}
info.code = buffer;
info.size = code_len;
info.offset = address;
@ -753,6 +841,7 @@ bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *in
if (ret) {
*size = (uint16_t)(insn.readerCursor - address);
return false;
} else {
*size = (uint16_t)insn.length;

View File

@ -95,7 +95,7 @@
#undef INSTRUCTION_SPECIFIER_FIELDS
#undef INSTRUCTION_IDS
bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len,
bool X86_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address, void *info);
#endif

17
cs.c
View File

@ -419,6 +419,7 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
void *tmp;
size_t skipdata_bytes;
uint64_t offset_org;
uint8_t *tmpbuf = NULL, *org_tmpbuf = NULL;
if (!handle) {
// FIXME: how to handle this case:
@ -433,12 +434,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
memset(insn_cache, 0, sizeof(insn_cache));
// save the original offset for SKIPDATA
offset_org = offset;
while (size > 0) {
MCInst_Init(&mci);
mci.csh = handle;
r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
r = handle->disasm(ud, buffer, &tmpbuf, size, &mci, &insn_size, offset, handle->getinsn_info);
if (r) {
SStream ss;
SStream_Init(&ss);
@ -486,6 +489,14 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
}
buffer += insn_size;
if (tmpbuf != NULL) {
// save the original tmpbuf to free it later
if (org_tmpbuf == NULL)
org_tmpbuf = tmpbuf;
tmpbuf += insn_size;
}
size -= insn_size;
offset += insn_size;
@ -564,6 +575,10 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset,
}
}
// free tmpbuf if it was allocated in @disasm
if (org_tmpbuf)
cs_mem_free(org_tmpbuf);
if (f) {
// resize total to contain newly disasm insns
void *tmp = cs_mem_realloc(total, total_size + f * sizeof(insn_cache[0]));

View File

@ -15,7 +15,7 @@ typedef void (*Printer_t)(MCInst *MI, SStream *OS, void *info);
// this is the best time to gather insn's characteristics
typedef void (*PostPrinter_t)(csh handle, cs_insn *, char *mnem);
typedef bool (*Disasm_t)(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
typedef bool (*Disasm_t)(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
typedef const char *(*GetName_t)(csh handle, unsigned int reg);