From 7772d859af6ff512debf2beb8b1cf74bc5a5e436 Mon Sep 17 00:00:00 2001 From: Nguyen Anh Quynh Date: Tue, 21 Jan 2014 11:49:25 +0800 Subject: [PATCH] x86: fix known issue with prefix by combining with previous prefix instruction. this is not perfect, but good enough for now --- arch/X86/X86Mapping.c | 43 +++++++++++++++++++++++++++++++++++++ arch/X86/X86Mapping.h | 5 +++++ arch/X86/X86Module.c | 2 ++ cs.c | 50 ++++++++++++++++++++++++++++++------------- cs_priv.h | 7 ++++++ 5 files changed, 92 insertions(+), 15 deletions(-) diff --git a/arch/X86/X86Mapping.c b/arch/X86/X86Mapping.c index 272e9758..37508815 100644 --- a/arch/X86/X86Mapping.c +++ b/arch/X86/X86Mapping.c @@ -6636,3 +6636,46 @@ unsigned int X86_get_insn_id2(unsigned int id) { return insn_reverse_id(insns, ARR_SIZE(insns), id); } + +// can this instruction combine with prev prefix instruction? +// this also updates h->pre_prefix if needed +bool X86_insn_check_combine(cs_struct *h, cs_insn *insn) +{ + // is this a prefix instruction? + if (insn->id == X86_INS_LOCK || insn->id == X86_INS_REP || + insn->id == X86_INS_REPNE) { + // then save this as prev_prefix + h->prev_prefix = insn->id; + return false; + } + + // if the previous instruction is a prefix, then OK to combine with this + if (h->prev_prefix) { + return true; + } + + // cannot combine this with a prefix + return false; +} + +// combine this instruction with previous prefix instruction +void X86_insn_combine(cs_struct *h, cs_insn *insn, cs_insn *prev) +{ + // reset prev_prefix + h->prev_prefix = 0; + + // copy information from insn to prev + prev->id = insn->id; + prev->size += insn->size; + memmove(prev->bytes+1, insn->bytes, sizeof(insn->bytes) - 1); + strlcat(prev->mnemonic, " ", sizeof(insn->mnemonic)); + strlcat(prev->mnemonic, insn->mnemonic, sizeof(insn->mnemonic)); + strlcpy(prev->op_str, insn->op_str, sizeof(insn->op_str)); + + if (h->detail) { + // save old prefix to copy it back later + uint8_t prefix = prev->detail->x86.opcode[0]; + memmove(prev->detail, insn->detail, sizeof(cs_detail)); + prev->detail->x86.prefix[0] = prefix; + } +} diff --git a/arch/X86/X86Mapping.h b/arch/X86/X86Mapping.h index 1b7e7f27..586f3a22 100644 --- a/arch/X86/X86Mapping.h +++ b/arch/X86/X86Mapping.h @@ -38,4 +38,9 @@ unsigned int X86_get_insn_id2(unsigned int insn_id); // post printer for X86. void X86_post_printer(csh handle, cs_insn *pub_insn, char *insn_asm); +// handle X86 prefixes +bool X86_insn_check_combine(cs_struct *h, cs_insn *insn); + +void X86_insn_combine(cs_struct *h, cs_insn *insn, cs_insn *prev); + #endif diff --git a/arch/X86/X86Module.c b/arch/X86/X86Module.c index 411a7228..db57175f 100644 --- a/arch/X86/X86Module.c +++ b/arch/X86/X86Module.c @@ -19,6 +19,8 @@ static cs_err init(cs_struct *ud) ud->insn_id = X86_get_insn_id; ud->insn_name = X86_insn_name; ud->post_printer = X86_post_printer; + ud->check_combine = X86_insn_check_combine; + ud->combine = X86_insn_combine; return CS_ERR_OK; } diff --git a/cs.c b/cs.c index ce3d7bfe..4c1be11b 100644 --- a/cs.c +++ b/cs.c @@ -268,6 +268,18 @@ cs_err cs_option(csh ud, cs_opt_type type, size_t value) return arch_option[handle->arch](handle, type, value); } +// get previous instruction, which can be in the cache, or in total buffer +static cs_insn *get_prev_insn(cs_insn *cache, unsigned int f, void *total, size_t total_size) +{ + if (f == 0) { + if (total == NULL) + return NULL; + // get the trailing insn from total buffer + return (cs_insn *)(total + total_size - sizeof(cs_insn)); + } else + return &cache[f - 1]; +} + // dynamicly allocate memory to contain disasm insn // NOTE: caller must free() the allocated memory itself to avoid memory leaking size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn) @@ -275,7 +287,8 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, cs_struct *handle = (cs_struct *)(uintptr_t)ud; MCInst mci; uint16_t insn_size; - size_t c = 0, f = 0; + size_t c = 0; + unsigned int f = 0; cs_insn insn_cache[64]; void *total = NULL; size_t total_size = 0; @@ -315,25 +328,32 @@ size_t cs_disasm_ex(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer); - f++; + if (!handle->check_combine || !handle->check_combine(handle, &insn_cache[f])) { + f++; - if (f == ARR_SIZE(insn_cache)) { - // resize total to contain newly disasm insns - total_size += sizeof(insn_cache); - void *tmp = cs_mem_realloc(total, total_size); - if (tmp == NULL) { // insufficient memory - cs_mem_free(total); - handle->errnum = CS_ERR_MEM; - return 0; + if (f == ARR_SIZE(insn_cache)) { + // resize total to contain newly disasm insns + total_size += sizeof(insn_cache); + void *tmp = cs_mem_realloc(total, total_size); + if (tmp == NULL) { // insufficient memory + cs_mem_free(total); + handle->errnum = CS_ERR_MEM; + return 0; + } + + total = tmp; + memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache)); + // reset f back to 0 + f = 0; } - total = tmp; - memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache)); - // reset f back to 0 - f = 0; + c++; + } else { + // combine this instruction with previous prefix instruction + cs_insn *prev = get_prev_insn(insn_cache, f, total, total_size); + handle->combine(handle, &insn_cache[f], prev); } - c++; buffer += insn_size; size -= insn_size; offset += insn_size; diff --git a/cs_priv.h b/cs_priv.h index f959c77c..ee1b9816 100644 --- a/cs_priv.h +++ b/cs_priv.h @@ -21,6 +21,10 @@ typedef const char *(*GetName_t)(csh handle, unsigned int reg); typedef void (*GetID_t)(cs_struct *h, cs_insn *insn, unsigned int id); +typedef bool (*CheckCombineInsn_t)(cs_struct *h, cs_insn *insn); + +typedef void (*CombineInsn_t)(cs_struct *h, cs_insn *insn, cs_insn *prev); + // for ARM only typedef struct ARM_ITStatus { unsigned char ITStates[128]; // FIXME @@ -45,6 +49,9 @@ struct cs_struct { int syntax; // asm syntax for simple printer such as PPC bool doing_mem; // handling memory operand in InstPrinter code unsigned short *insn_cache; // index caching for mapping.c + CheckCombineInsn_t check_combine; + CombineInsn_t combine; + uint8_t prev_prefix; // save previous prefix for combining instructions - X86 only. }; #define MAX_ARCH 8