From ff7bba3d6d1398ace7765c74bb56d01f2a3927f5 Mon Sep 17 00:00:00 2001
From: Nguyen Anh Quynh <aquynh@gmail.com>
Date: Mon, 3 Nov 2014 16:32:06 +0800
Subject: [PATCH] x86: print out immediate as positive number for logic
 arithmetic operations: AND, OR, XOR. only works for x86 Intel syntax so far.
 issue reported by Pancake

---
 MCInst.c                       |  3 +-
 MCInst.h                       |  5 +--
 arch/X86/X86ATTInstPrinter.c   |  4 +--
 arch/X86/X86Disassembler.c     |  2 +-
 arch/X86/X86IntelInstPrinter.c | 56 +++++++++++++++++++++++++++-------
 arch/X86/X86Mapping.c          |  8 ++++-
 cs.c                           | 14 +++++++--
 suite/x86odd.py                | 16 +++++++++-
 8 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/MCInst.c b/MCInst.c
index cdd7a0da..ab9ded93 100644
--- a/MCInst.c
+++ b/MCInst.c
@@ -14,7 +14,8 @@ void MCInst_Init(MCInst *inst)
 {
 	inst->OpcodePub = 0;
 	inst->size = 0;
-	inst->has_imm = 0;
+	inst->has_imm = false;
+	inst->op1_size = 0;
 }
 
 void MCInst_clear(MCInst *inst)
diff --git a/MCInst.h b/MCInst.h
index 0cea40bc..97fe80be 100644
--- a/MCInst.h
+++ b/MCInst.h
@@ -91,8 +91,9 @@ MCOperand *MCOperand_CreateImm1(MCInst *inst, int64_t Val);
 /// instruction.
 struct MCInst {
 	unsigned OpcodePub;
-	unsigned size;	// number of operands
-	int has_imm;	// indicate this instruction has an X86_OP_IMM operand - used for ATT syntax
+	uint8_t size;	// number of operands
+	bool has_imm;	// indicate this instruction has an X86_OP_IMM operand - used for ATT syntax
+	uint8_t op1_size; // size of 1st operand - for X86 Intel syntax
 	unsigned Opcode;
 	MCOperand Operands[48];
 	cs_insn *flat_insn;	// insn to be exposed to public
diff --git a/arch/X86/X86ATTInstPrinter.c b/arch/X86/X86ATTInstPrinter.c
index 445b7edd..9a2f252b 100644
--- a/arch/X86/X86ATTInstPrinter.c
+++ b/arch/X86/X86ATTInstPrinter.c
@@ -454,7 +454,7 @@ static void printPCRelImm(MCInst *MI, unsigned OpNo, SStream *O)
 		}
 		if (MI->csh->detail) {
 			MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].type = X86_OP_IMM;
-			MI->has_imm = 1;
+			MI->has_imm = true;
 			MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm;
 			MI->flat_insn->detail->x86.op_count++;
 		}
@@ -497,7 +497,7 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O)
 				MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].mem.disp = imm;
 			} else {
 				MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].type = X86_OP_IMM;
-				MI->has_imm = 1;
+				MI->has_imm = true;
 				MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm;
 				MI->flat_insn->detail->x86.op_count++;
 			}
diff --git a/arch/X86/X86Disassembler.c b/arch/X86/X86Disassembler.c
index c2cb82f9..734af268 100644
--- a/arch/X86/X86Disassembler.c
+++ b/arch/X86/X86Disassembler.c
@@ -775,9 +775,9 @@ bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 
 		result = (!translateInstruction(instr, &insn)) ?  true : false;
 		if (result) {
+			instr->imm_size = insn.immSize;
 			if (handle->detail) {
 				update_pub_insn(instr->flat_insn, &insn, instr->x86_prefix);
-				instr->imm_size = insn.immSize;
 			} else {
 				// still copy all prefixes
 				instr->x86_prefix[0] = insn.prefix0;
diff --git a/arch/X86/X86IntelInstPrinter.c b/arch/X86/X86IntelInstPrinter.c
index d7cb1a3b..de5f9a77 100644
--- a/arch/X86/X86IntelInstPrinter.c
+++ b/arch/X86/X86IntelInstPrinter.c
@@ -433,6 +433,9 @@ static void printMemOffset(MCInst *MI, unsigned Op, SStream *O)
 
 	if (MI->csh->detail)
 		MI->flat_insn->detail->x86.op_count++;
+
+	if (MI->op1_size == 0)
+		MI->op1_size = MI->x86opsize;
 }
 
 static void printMemOffs8(MCInst *MI, unsigned OpNo, SStream *O)
@@ -477,10 +480,10 @@ void X86_Intel_printInst(MCInst *MI, SStream *O, void *Info)
 	else
 		printInstruction(MI, O, Info);
 
+	reg = X86_insn_reg_intel(MCInst_getOpcode(MI));
 	if (MI->csh->detail) {
 		// first op can be embedded in the asm by llvm.
 		// so we have to add the missing register as the first operand
-		reg = X86_insn_reg_intel(MCInst_getOpcode(MI));
 		if (reg) {
 			// shift all the ops right to leave 1st slot for this new register op
 			memmove(&(MI->flat_insn->detail->x86.operands[1]), &(MI->flat_insn->detail->x86.operands[0]),
@@ -491,6 +494,9 @@ void X86_Intel_printInst(MCInst *MI, SStream *O, void *Info)
 			MI->flat_insn->detail->x86.op_count++;
 		}
 	}
+
+	if (MI->op1_size == 0 && reg)
+		MI->op1_size = MI->csh->regsize_map[reg];
 }
 
 /// printPCRelImm - This is used to print an immediate value that ends up
@@ -522,6 +528,9 @@ static void printPCRelImm(MCInst *MI, unsigned OpNo, SStream *O)
 			MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].imm = imm;
 			MI->flat_insn->detail->x86.op_count++;
 		}
+
+		if (MI->op1_size == 0)
+			MI->op1_size = MI->imm_size;
 	}
 }
 
@@ -542,18 +551,37 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O)
 				MI->flat_insn->detail->x86.op_count++;
 			}
 		}
+
+		if (MI->op1_size == 0)
+			MI->op1_size = MI->csh->regsize_map[reg];
 	} else if (MCOperand_isImm(Op)) {
 		int64_t imm = MCOperand_getImm(Op);
-		if (imm >= 0) {
-			if (imm > HEX_THRESHOLD)
-				SStream_concat(O, "0x%"PRIx64, imm);
-			else
-				SStream_concat(O, "%"PRIu64, imm);
-		} else {
-			if (imm < -HEX_THRESHOLD)
-				SStream_concat(O, "-0x%"PRIx64, -imm);
-			else
-				SStream_concat(O, "-%"PRIu64, -imm);
+
+		switch(MI->flat_insn->id) {
+			default:
+				if (imm >= 0) {
+					if (imm > HEX_THRESHOLD)
+						SStream_concat(O, "0x%"PRIx64, imm);
+					else
+						SStream_concat(O, "%"PRIu64, imm);
+				} else {
+					if (imm < -HEX_THRESHOLD)
+						SStream_concat(O, "-0x%"PRIx64, -imm);
+					else
+						SStream_concat(O, "-%"PRIu64, -imm);
+				}
+
+				break;
+
+			case X86_INS_AND:
+			case X86_INS_OR:
+			case X86_INS_XOR:
+				// do not print number in negative form
+				if (imm == 0)
+					SStream_concat0(O, "0");
+				else
+					SStream_concat(O, "0x%"PRIx64, arch_masks[MI->op1_size? MI->op1_size : MI->imm_size] & imm);
+				break;
 		}
 
 		if (MI->csh->detail) {
@@ -569,6 +597,9 @@ static void printOperand(MCInst *MI, unsigned OpNo, SStream *O)
 				MI->flat_insn->detail->x86.op_count++;
 			}
 		}
+
+		//if (MI->op1_size == 0)
+		//	MI->op1_size = MI->imm_size;
 	}
 }
 
@@ -657,6 +688,9 @@ static void printMemReference(MCInst *MI, unsigned Op, SStream *O)
 
 	if (MI->csh->detail)
 		MI->flat_insn->detail->x86.op_count++;
+
+	if (MI->op1_size == 0)
+		MI->op1_size = MI->x86opsize;
 }
 
 #define GET_REGINFO_ENUM
diff --git a/arch/X86/X86Mapping.c b/arch/X86/X86Mapping.c
index 1c75db36..94fd0588 100644
--- a/arch/X86/X86Mapping.c
+++ b/arch/X86/X86Mapping.c
@@ -47600,6 +47600,9 @@ void op_addReg(MCInst *MI, int reg)
 		MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].size = MI->csh->regsize_map[reg];
 		MI->flat_insn->detail->x86.op_count++;
 	}
+
+	if (MI->op1_size == 0)
+		MI->op1_size = MI->csh->regsize_map[reg];
 }
 
 void op_addImm(MCInst *MI, int v)
@@ -47614,9 +47617,12 @@ void op_addImm(MCInst *MI, int v)
 			else
 				MI->flat_insn->detail->x86.operands[MI->flat_insn->detail->x86.op_count].size = MI->imm_size;
 		} else
-			MI->has_imm = 1;
+			MI->has_imm = true;
 		MI->flat_insn->detail->x86.op_count++;
 	}
+
+	if (MI->op1_size == 0)
+		MI->op1_size = MI->imm_size;
 }
 
 void op_addSseCC(MCInst *MI, int v)
diff --git a/cs.c b/cs.c
index 587c1b83..04bc615b 100644
--- a/cs.c
+++ b/cs.c
@@ -264,9 +264,6 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI
 	memcpy(insn->bytes, code + insn->size - copy_size, copy_size);
 	insn->size = copy_size;
 
-	// map internal instruction opcode to public insn ID
-	handle->insn_id(handle, insn, MCInst_getOpcode(mci));
-
 	// alias instruction might have ID saved in OpcodePub
 	if (MCInst_getOpcodePub(mci))
 		insn->id = MCInst_getOpcodePub(mci);
@@ -481,7 +478,12 @@ size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, si
 			SStream_Init(&ss);
 
 			mci.flat_insn->size = insn_size;
+
+			// map internal instruction opcode to public insn ID
+			handle->insn_id(handle, insn_cache, mci.Opcode);
+
 			handle->printer(&mci, &ss, handle->printer_info);
+
 			fill_insn(handle, insn_cache, ss.buffer, &mci, handle->post_printer, buffer);
 
 			next_offset = insn_size;
@@ -680,8 +682,14 @@ bool cs_disasm_iter(csh ud, const uint8_t **code, size_t *size,
 		SStream_Init(&ss);
 
 		mci.flat_insn->size = insn_size;
+
+		// map internal instruction opcode to public insn ID
+		handle->insn_id(handle, insn, mci.Opcode);
+
 		handle->printer(&mci, &ss, handle->printer_info);
+
 		fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, *code);
+
 		*code += insn_size;
 		*size -= insn_size;
 		*address += insn_size;
diff --git a/suite/x86odd.py b/suite/x86odd.py
index fa19d050..4575d00f 100755
--- a/suite/x86odd.py
+++ b/suite/x86odd.py
@@ -43,14 +43,28 @@ CODE32_MEMREF += b"\xa1\xdd\xfe\xff\xff"
 CODE32_MEMREF += b"\x8b\x04\x91"
 
 
+CODE32_ARITH  = b"\x83\xe0\xf7"
+CODE32_ARITH += b"\x83\xe0\x10"
+CODE32_ARITH += b"\x83\xe0\x00"
+CODE32_ARITH += b"\x80\x23\x10"
+
+CODE64_ARITH  = b"\x41\x83\xe0\xfa"
+CODE64_ARITH += b"\x48\x83\xe4\xf0"
+
+
+
 _python3 = sys.version_info.major == 3
 
 all_tests = (
         (CS_ARCH_X86, CS_MODE_32, CODE32, "X86 32 (Intel syntax)", 0),
         (CS_ARCH_X86, CS_MODE_32, CODE32, "X86 32 (ATT syntax)", CS_OPT_SYNTAX_ATT),
+
         (CS_ARCH_X86, CS_MODE_32, CODE32_MEMREF, "X86 32 MemRef (Intel syntax)", 0),
         (CS_ARCH_X86, CS_MODE_32, CODE32_MEMREF, "X86 32 MemRef (ATT syntax)", CS_OPT_SYNTAX_ATT),
-        #(CS_ARCH_X86, CS_MODE_64, X86_CODE64, "X86 64 (Intel syntax)", 0),
+
+        (CS_ARCH_X86, CS_MODE_32, CODE32_ARITH, "X86 32 (Intel syntax)", 0),
+
+        (CS_ARCH_X86, CS_MODE_64, CODE64_ARITH, "X86 64 (Intel syntax)", 0),
 )