From ff7bba3d6d1398ace7765c74bb56d01f2a3927f5 Mon Sep 17 00:00:00 2001 From: Nguyen Anh Quynh Date: Mon, 3 Nov 2014 16:32:06 +0800 Subject: [PATCH] x86: print out immediate as positive number for logic arithmetic operations: AND, OR, XOR. only works for x86 Intel syntax so far. issue reported by Pancake --- MCInst.c | 3 +- MCInst.h | 5 +-- arch/X86/X86ATTInstPrinter.c | 4 +-- arch/X86/X86Disassembler.c | 2 +- arch/X86/X86IntelInstPrinter.c | 56 +++++++++++++++++++++++++++------- arch/X86/X86Mapping.c | 8 ++++- cs.c | 14 +++++++-- suite/x86odd.py | 16 +++++++++- 8 files changed, 86 insertions(+), 22 deletions(-) diff --git a/MCInst.c b/MCInst.c index cdd7a0da..ab9ded93 100644 --- a/MCInst.c +++ b/MCInst.c @@ -14,7 +14,8 @@ void MCInst_Init(MCInst *inst) { inst->OpcodePub = 0; inst->size = 0; - inst->has_imm = 0; + inst->has_imm = false; + inst->op1_size = 0; } void MCInst_clear(MCInst *inst) diff --git a/MCInst.h b/MCInst.h index 0cea40bc..97fe80be 100644 --- a/MCInst.h +++ b/MCInst.h @@ -91,8 +91,9 @@ MCOperand *MCOperand_CreateImm1(MCInst *inst, int64_t Val); /// instruction. struct MCInst { unsigned OpcodePub; - unsigned size; // number of operands - int has_imm; // indicate this instruction has an X86_OP_IMM operand - used for ATT syntax + uint8_t size; // number of operands + bool has_imm; // indicate this instruction has an X86_OP_IMM operand - used for ATT syntax + uint8_t op1_size; // size of 1st operand - for X86 Intel syntax unsigned Opcode; MCOperand Operands[48]; cs_insn *flat_insn; // insn to be exposed to public diff --git a/arch/X86/X86ATTInstPrinter.c b/arch/X86/X86ATTInstPrinter.c index 445b7edd..9a2f252b 100644 --- a/arch/X86/X86ATTInstPrinter.c +++ b/arch/X86/X86ATTInstPrinter.c @@ -454,7 +454,7 @@ static void printPCRelImm(MCInst *MI, unsigned OpNo, SStream *O) } if (MI->csh->detail) { MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].type = X86_OP_IMM; - MI->has_imm = 1; + MI->has_imm = true; MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm; MI->flat_insn->detail->x86.op_count++; } @@ -497,7 +497,7 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O) MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].mem.disp = imm; } else { MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].type = X86_OP_IMM; - MI->has_imm = 1; + MI->has_imm = true; MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm; MI->flat_insn->detail->x86.op_count++; } diff --git a/arch/X86/X86Disassembler.c b/arch/X86/X86Disassembler.c index c2cb82f9..734af268 100644 --- a/arch/X86/X86Disassembler.c +++ b/arch/X86/X86Disassembler.c @@ -775,9 +775,9 @@ bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len, result = (!translateInstruction(instr, &insn)) ? true : false; if (result) { + instr->imm_size = insn.immSize; if (handle->detail) { update_pub_insn(instr->flat_insn, &insn, instr->x86_prefix); - instr->imm_size = insn.immSize; } else { // still copy all prefixes instr->x86_prefix[0] = insn.prefix0; diff --git a/arch/X86/X86IntelInstPrinter.c b/arch/X86/X86IntelInstPrinter.c index d7cb1a3b..de5f9a77 100644 --- a/arch/X86/X86IntelInstPrinter.c +++ b/arch/X86/X86IntelInstPrinter.c @@ -433,6 +433,9 @@ static void printMemOffset(MCInst *MI, unsigned Op, SStream *O) if (MI->csh->detail) MI->flat_insn->detail->x86.op_count++; + + if (MI->op1_size == 0) + MI->op1_size = MI->x86opsize; } static void printMemOffs8(MCInst *MI, unsigned OpNo, SStream *O) @@ -477,10 +480,10 @@ void X86_Intel_printInst(MCInst *MI, SStream *O, void *Info) else printInstruction(MI, O, Info); + reg = X86_insn_reg_intel(MCInst_getOpcode(MI)); if (MI->csh->detail) { // first op can be embedded in the asm by llvm. // so we have to add the missing register as the first operand - reg = X86_insn_reg_intel(MCInst_getOpcode(MI)); if (reg) { // shift all the ops right to leave 1st slot for this new register op memmove(&(MI->flat_insn->detail->x86.operands[1]), &(MI->flat_insn->detail->x86.operands[0]), @@ -491,6 +494,9 @@ void X86_Intel_printInst(MCInst *MI, SStream *O, void *Info) MI->flat_insn->detail->x86.op_count++; } } + + if (MI->op1_size == 0 && reg) + MI->op1_size = MI->csh->regsize_map[reg]; } /// printPCRelImm - This is used to print an immediate value that ends up @@ -522,6 +528,9 @@ static void printPCRelImm(MCInst *MI, unsigned OpNo, SStream *O) MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm; MI->flat_insn->detail->x86.op_count++; } + + if (MI->op1_size == 0) + MI->op1_size = MI->imm_size; } } @@ -542,18 +551,37 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O) MI->flat_insn->detail->x86.op_count++; } } + + if (MI->op1_size == 0) + MI->op1_size = MI->csh->regsize_map[reg]; } else if (MCOperand_isImm(Op)) { int64_t imm = MCOperand_getImm(Op); - if (imm >= 0) { - if (imm > HEX_THRESHOLD) - SStream_concat(O, "0x%"PRIx64, imm); - else - SStream_concat(O, "%"PRIu64, imm); - } else { - if (imm < -HEX_THRESHOLD) - SStream_concat(O, "-0x%"PRIx64, -imm); - else - SStream_concat(O, "-%"PRIu64, -imm); + + switch(MI->flat_insn->id) { + default: + if (imm >= 0) { + if (imm > HEX_THRESHOLD) + SStream_concat(O, "0x%"PRIx64, imm); + else + SStream_concat(O, "%"PRIu64, imm); + } else { + if (imm < -HEX_THRESHOLD) + SStream_concat(O, "-0x%"PRIx64, -imm); + else + SStream_concat(O, "-%"PRIu64, -imm); + } + + break; + + case X86_INS_AND: + case X86_INS_OR: + case X86_INS_XOR: + // do not print number in negative form + if (imm == 0) + SStream_concat0(O, "0"); + else + SStream_concat(O, "0x%"PRIx64, arch_masks[MI->op1_size? MI->op1_size : MI->imm_size] & imm); + break; } if (MI->csh->detail) { @@ -569,6 +597,9 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O) MI->flat_insn->detail->x86.op_count++; } } + + //if (MI->op1_size == 0) + // MI->op1_size = MI->imm_size; } } @@ -657,6 +688,9 @@ static void printMemReference(MCInst *MI, unsigned Op, SStream *O) if (MI->csh->detail) MI->flat_insn->detail->x86.op_count++; + + if (MI->op1_size == 0) + MI->op1_size = MI->x86opsize; } #define GET_REGINFO_ENUM diff --git a/arch/X86/X86Mapping.c b/arch/X86/X86Mapping.c index 1c75db36..94fd0588 100644 --- a/arch/X86/X86Mapping.c +++ b/arch/X86/X86Mapping.c @@ -47600,6 +47600,9 @@ void op_addReg(MCInst *MI, int reg) MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].size = MI->csh->regsize_map[reg]; MI->flat_insn->detail->x86.op_count++; } + + if (MI->op1_size == 0) + MI->op1_size = MI->csh->regsize_map[reg]; } void op_addImm(MCInst *MI, int v) @@ -47614,9 +47617,12 @@ void op_addImm(MCInst *MI, int v) else MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].size = MI->imm_size; } else - MI->has_imm = 1; + MI->has_imm = true; MI->flat_insn->detail->x86.op_count++; } + + if (MI->op1_size == 0) + MI->op1_size = MI->imm_size; } void op_addSseCC(MCInst *MI, int v) diff --git a/cs.c b/cs.c index 587c1b83..04bc615b 100644 --- a/cs.c +++ b/cs.c @@ -264,9 +264,6 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI memcpy(insn->bytes, code + insn->size - copy_size, copy_size); insn->size = copy_size; - // map internal instruction opcode to public insn ID - handle->insn_id(handle, insn, MCInst_getOpcode(mci)); - // alias instruction might have ID saved in OpcodePub if (MCInst_getOpcodePub(mci)) insn->id = MCInst_getOpcodePub(mci); @@ -481,7 +478,12 @@ size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, si SStream_Init(&ss); mci.flat_insn->size = insn_size; + + // map internal instruction opcode to public insn ID + handle->insn_id(handle, insn_cache, mci.Opcode); + handle->printer(&mci, &ss, handle->printer_info); + fill_insn(handle, insn_cache, ss.buffer, &mci, handle->post_printer, buffer); next_offset = insn_size; @@ -680,8 +682,14 @@ bool cs_disasm_iter(csh ud, const uint8_t **code, size_t *size, SStream_Init(&ss); mci.flat_insn->size = insn_size; + + // map internal instruction opcode to public insn ID + handle->insn_id(handle, insn, mci.Opcode); + handle->printer(&mci, &ss, handle->printer_info); + fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, *code); + *code += insn_size; *size -= insn_size; *address += insn_size; diff --git a/suite/x86odd.py b/suite/x86odd.py index fa19d050..4575d00f 100755 --- a/suite/x86odd.py +++ b/suite/x86odd.py @@ -43,14 +43,28 @@ CODE32_MEMREF += b"\xa1\xdd\xfe\xff\xff" CODE32_MEMREF += b"\x8b\x04\x91" +CODE32_ARITH = b"\x83\xe0\xf7" +CODE32_ARITH += b"\x83\xe0\x10" +CODE32_ARITH += b"\x83\xe0\x00" +CODE32_ARITH += b"\x80\x23\x10" + +CODE64_ARITH = b"\x41\x83\xe0\xfa" +CODE64_ARITH += b"\x48\x83\xe4\xf0" + + + _python3 = sys.version_info.major == 3 all_tests = ( (CS_ARCH_X86, CS_MODE_32, CODE32, "X86 32 (Intel syntax)", 0), (CS_ARCH_X86, CS_MODE_32, CODE32, "X86 32 (ATT syntax)", CS_OPT_SYNTAX_ATT), + (CS_ARCH_X86, CS_MODE_32, CODE32_MEMREF, "X86 32 MemRef (Intel syntax)", 0), (CS_ARCH_X86, CS_MODE_32, CODE32_MEMREF, "X86 32 MemRef (ATT syntax)", CS_OPT_SYNTAX_ATT), - #(CS_ARCH_X86, CS_MODE_64, X86_CODE64, "X86 64 (Intel syntax)", 0), + + (CS_ARCH_X86, CS_MODE_32, CODE32_ARITH, "X86 32 (Intel syntax)", 0), + + (CS_ARCH_X86, CS_MODE_64, CODE64_ARITH, "X86 64 (Intel syntax)", 0), )