diff --git a/.gitignore b/.gitignore index 1a6d320d..6cb02234 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,7 @@ tests/test_m680x tests/test_evm tests/test_wasm tests/test_mos65xx +tests/test_bpf # regress binaries suite/regress/invalid_read_in_print_operand diff --git a/CMakeLists.txt b/CMakeLists.txt index fb6883f8..49a1cf99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,8 @@ option(CAPSTONE_BUILD_TESTS "Build tests" ON) option(CAPSTONE_BUILD_CSTOOL "Build cstool" ON) option(CAPSTONE_USE_DEFAULT_ALLOC "Use default memory allocation functions" ON) -set(SUPPORTED_ARCHITECTURES ARM ARM64 M68K MIPS PPC SPARC SYSZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM) -set(SUPPORTED_ARCHITECTURE_LABELS ARM ARM64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM) +set(SUPPORTED_ARCHITECTURES ARM ARM64 M68K MIPS PPC SPARC SYSZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM BPF) +set(SUPPORTED_ARCHITECTURE_LABELS ARM ARM64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM BPF) list(LENGTH SUPPORTED_ARCHITECTURES count) math(EXPR count "${count}-1") @@ -115,6 +115,7 @@ set(HEADERS_COMMON include/capstone/tms320c64x.h include/capstone/m680x.h include/capstone/mos65xx.h + include/capstone/bpf.h include/capstone/platform.h ) @@ -475,6 +476,24 @@ if (NOT CAPSTONE_X86_ONLY AND CAPSTONE_MOS65XX_SUPPORT) set(TEST_SOURCES ${TEST_SOURCES} test_mos65xx.c) endif () +if (NOT CAPSTONE_X86_ONLY AND CAPSTONE_BPF_SUPPORT) + add_definitions(-DCAPSTONE_HAS_BPF) + set(SOURCES_BPF + arch/BPF/BPFDisassembler.c + arch/BPF/BPFInstPrinter.c + arch/BPF/BPFMapping.c + arch/BPF/BPFModule.c + ) + set(HEADERS_BPF + arch/BPF/BPFConstants.h + arch/BPF/BPFDisassembler.h + arch/BPF/BPFInstPrinter.h + arch/BPF/BPFMapping.h + arch/BPF/BPFModule.h + ) + set(TEST_SOURCES ${TEST_SOURCES} test_bpf.c) +endif () + if (CAPSTONE_OSXKERNEL_SUPPORT) add_definitions(-DCAPSTONE_HAS_OSXKERNEL) endif () @@ -495,6 +514,7 @@ set(ALL_SOURCES ${SOURCES_EVM} ${SOURCES_WASM} ${SOURCES_MOS65XX} + ${SOURCES_BPF} ) set(ALL_HEADERS @@ -514,6 +534,7 @@ set(ALL_HEADERS ${HEADERS_EVM} ${HEADERS_WASM} ${HEADERS_MOS65XX} + ${HEADERS_BPF} ) include_directories("${PROJECT_SOURCE_DIR}/include") @@ -596,6 +617,7 @@ source_group("Source\\M680X" FILES ${SOURCES_M680X}) source_group("Source\\EVM" FILES ${SOURCES_EVM}) source_group("Source\\WASM" FILES ${SOURCES_WASM}) source_group("Source\\MOS65XX" FILES ${SOURCES_MOS65XX}) +source_group("Source\\BPF" FILES ${SOURCES_BPF}) source_group("Include\\Common" FILES ${HEADERS_COMMON}) source_group("Include\\Engine" FILES ${HEADERS_ENGINE}) @@ -613,6 +635,7 @@ source_group("Include\\M680X" FILES ${HEADERS_MC680X}) source_group("Include\\EVM" FILES ${HEADERS_EVM}) source_group("Include\\WASM" FILES ${HEADERS_WASM}) source_group("Include\\MOS65XX" FILES ${HEADERS_MOS65XX}) +source_group("Include\\BPF" FILES ${HEADERS_BPF}) ### test library 64bit routine: get_property(LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) diff --git a/COMPILE.TXT b/COMPILE.TXT index 9a1edd0b..07f0411d 100644 --- a/COMPILE.TXT +++ b/COMPILE.TXT @@ -96,6 +96,7 @@ Capstone requires no prerequisite packages, so it is easy to compile & install. /usr/include/capstone/systemz.h /usr/include/capstone/tms320c64x.h /usr/include/capstone/xcore.h + /usr/include/capstone/bpf.h /usr/include/capstone/platform.h /usr/lib/libcapstone.so (for Linux/*nix), or /usr/lib/libcapstone.dylib (OSX) /usr/lib/libcapstone.a diff --git a/COMPILE_CMAKE.TXT b/COMPILE_CMAKE.TXT index b1d441c8..e14ea081 100644 --- a/COMPILE_CMAKE.TXT +++ b/COMPILE_CMAKE.TXT @@ -34,6 +34,7 @@ Get CMake for free from http://www.cmake.org. - CAPSTONE_X86_M680X: support M680X. Run cmake with -DCAPSTONE_M680X_SUPPORT=0 to remove M680X. - CAPSTONE_X86_EVM: support EVM. Run cmake with -DCAPSTONE_EVM_SUPPORT=0 to remove EVM. - CAPSTONE_X86_WASM: support Web Assembly. Run cmake with -DCAPSTONE_WASM_SUPPORT=0 to remove WASM. + - CAPSTONE_BPF_SUPPORT: support BPF. Run cmake with -DCAPSTONE_BPF_SUPPORT=0 to remove BPF. By default, all architectures are compiled in. diff --git a/CREDITS.TXT b/CREDITS.TXT index a2ffc6ab..f0e8efc2 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -81,3 +81,4 @@ Tong Yu(Spike) & Kai Jern, Lau (xwings): WASM architecture. Sebastian Macke: MOS65XX architecture Ilya Leoshkevich: SystemZ architecture improvements. Do Minh Tuan: Regression testing tool (cstest) +david942j: BPF (both classic and extended) architecture. diff --git a/HACK.TXT b/HACK.TXT index a928621b..1183b4eb 100644 --- a/HACK.TXT +++ b/HACK.TXT @@ -7,6 +7,7 @@ Capstone source is organized as followings. ├── arch <- code handling disasm engine for each arch │   ├── AArch64 <- ARM64 (aka ARMv8) engine │   ├── ARM <- ARM engine +│   ├── BPF <- Berkeley Packet Filter engine │   ├── EVM <- Ethereum engine │   ├── M680X <- M680X engine │   ├── M68K <- M68K engine diff --git a/Makefile b/Makefile index c31c7e1c..401c33b3 100644 --- a/Makefile +++ b/Makefile @@ -271,10 +271,21 @@ ifneq (,$(findstring mos65xx,$(CAPSTONE_ARCHS))) endif +DEP_BPF = +DEP_BPF += $(wildcard arch/BPF/BPF*.inc) + +LIBOBJ_BPF = +ifneq (,$(findstring bpf,$(CAPSTONE_ARCHS))) + CFLAGS += -DCAPSTONE_HAS_BPF + LIBSRC_BPF += $(wildcard arch/BPF/BPF*.c) + LIBOBJ_BPF += $(LIBSRC_BPF:%.c=$(OBJDIR)/%.o) +endif + + LIBOBJ = LIBOBJ += $(OBJDIR)/cs.o $(OBJDIR)/utils.o $(OBJDIR)/SStream.o $(OBJDIR)/MCInstrDesc.o $(OBJDIR)/MCRegisterInfo.o LIBOBJ += $(LIBOBJ_ARM) $(LIBOBJ_ARM64) $(LIBOBJ_M68K) $(LIBOBJ_MIPS) $(LIBOBJ_PPC) $(LIBOBJ_SPARC) $(LIBOBJ_SYSZ) -LIBOBJ += $(LIBOBJ_X86) $(LIBOBJ_XCORE) $(LIBOBJ_TMS320C64X) $(LIBOBJ_M680X) $(LIBOBJ_EVM) $(LIBOBJ_MOS65XX) $(LIBOBJ_WASM) +LIBOBJ += $(LIBOBJ_X86) $(LIBOBJ_XCORE) $(LIBOBJ_TMS320C64X) $(LIBOBJ_M680X) $(LIBOBJ_EVM) $(LIBOBJ_MOS65XX) $(LIBOBJ_WASM) $(LIBOBJ_BPF) LIBOBJ += $(OBJDIR)/MCInst.o @@ -405,6 +416,7 @@ $(LIBOBJ_M680X): $(DEP_M680X) $(LIBOBJ_EVM): $(DEP_EVM) $(LIBOBJ_WASM): $(DEP_WASM) $(LIBOBJ_MOS65XX): $(DEP_MOS65XX) +$(LIBOBJ_BPF): $(DEP_BPF) ifeq ($(CAPSTONE_STATIC),yes) $(ARCHIVE): $(LIBOBJ) @@ -480,12 +492,12 @@ dist: TESTS = test_basic test_detail test_arm test_arm64 test_m68k test_mips test_ppc test_sparc -TESTS += test_systemz test_x86 test_xcore test_iter test_evm test_mos65xx test_wasm +TESTS += test_systemz test_x86 test_xcore test_iter test_evm test_mos65xx test_wasm test_bpf TESTS += test_basic.static test_detail.static test_arm.static test_arm64.static TESTS += test_m68k.static test_mips.static test_ppc.static test_sparc.static TESTS += test_systemz.static test_x86.static test_xcore.static test_m680x.static TESTS += test_skipdata test_skipdata.static test_iter.static test_evm.static -TESTS += test_mos65xx.static test_wasm.static +TESTS += test_mos65xx.static test_wasm.static test_bpf.static check: $(TESTS) fuzztest fuzzallcorp test_%: ./tests/$@ > /dev/null && echo OK || echo FAILED diff --git a/README.md b/README.md index 4d00fc57..d1895523 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ disasm engine for binary analysis and reversing in the security community. Created by Nguyen Anh Quynh, then developed and maintained by a small community, Capstone offers some unparalleled features: -- Support multiple hardware architectures: ARM, ARM64 (ARMv8), Ethereum VM, Webassembly, M68K, +- Support multiple hardware architectures: ARM, ARM64 (ARMv8), BPF, Ethereum VM, Webassembly, M68K, Mips, MOS65XX, PPC, Sparc, SystemZ, TMS320C64X, M680X, XCore and X86 (including X86_64). - Having clean/simple/lightweight/intuitive architecture-neutral API. diff --git a/arch/BPF/BPFConstants.h b/arch/BPF/BPFConstants.h new file mode 100644 index 00000000..d1259046 --- /dev/null +++ b/arch/BPF/BPFConstants.h @@ -0,0 +1,88 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +/* This file defines constants and macros used for parsing a BPF instruction */ + +#ifndef CS_BPF_CONSTANTS_H +#define CS_BPF_CONSTANTS_H + +#define BPF_CLASS(code) ((code) & 0x7) + +///< Instruction classes +#define BPF_CLASS_LD 0x00 +#define BPF_CLASS_LDX 0x01 +#define BPF_CLASS_ST 0x02 +#define BPF_CLASS_STX 0x03 +#define BPF_CLASS_ALU 0x04 +#define BPF_CLASS_JMP 0x05 +#define BPF_CLASS_RET 0x06 ///< cBPF only +#define BPF_CLASS_MISC 0x07 ///< cBPF only +#define BPF_CLASS_ALU64 0x07 ///< eBPF only + +#define BPF_OP(code) ((code) & 0xf0) + +///< Types of ALU instruction +#define BPF_ALU_ADD 0x00 +#define BPF_ALU_SUB 0x10 +#define BPF_ALU_MUL 0x20 +#define BPF_ALU_DIV 0x30 +#define BPF_ALU_OR 0x40 +#define BPF_ALU_AND 0x50 +#define BPF_ALU_LSH 0x60 +#define BPF_ALU_RSH 0x70 +#define BPF_ALU_NEG 0x80 +#define BPF_ALU_MOD 0x90 +#define BPF_ALU_XOR 0xa0 +#define BPF_ALU_MOV 0xb0 ///< eBPF only: mov reg to reg +#define BPF_ALU_ARSH 0xc0 ///< eBPF only: sign extending shift right +#define BPF_ALU_END 0xd0 ///< eBPF only: endianness conversion + +///< Types of jmp instruction +#define BPF_JUMP_JA 0x00 ///< goto +#define BPF_JUMP_JEQ 0x10 ///< '==' +#define BPF_JUMP_JGT 0x20 ///< unsigned '>' +#define BPF_JUMP_JGE 0x30 ///< unsigned '>=' +#define BPF_JUMP_JSET 0x40 ///< '&' +#define BPF_JUMP_JNE 0x50 ///< eBPF only: '!=' */ +#define BPF_JUMP_JSGT 0x60 ///< eBPF only: signed '>' +#define BPF_JUMP_JSGE 0x70 ///< eBPF only: signed '>=' +#define BPF_JUMP_CALL 0x80 ///< eBPF only: function call +#define BPF_JUMP_EXIT 0x90 ///< eBPF only: exit +#define BPF_JUMP_JLT 0xa0 ///< eBPF only: unsigned '<' +#define BPF_JUMP_JLE 0xb0 ///< eBPF only: unsigned '<=' +#define BPF_JUMP_JSLT 0xc0 ///< eBPF only: signed '<' +#define BPF_JUMP_JSLE 0xd0 ///< eBPF only: signed '<=' + +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_RVAL(code) ((code) & 0x18) /* cBPF only: for return types */ +///< Source operand +#define BPF_SRC_K 0x00 +#define BPF_SRC_X 0x08 +#define BPF_SRC_A 0x10 /* cBPF only */ + +#define BPF_SRC_LITTLE BPF_SRC_K +#define BPF_SRC_BIG BPF_SRC_X + +#define BPF_SIZE(code) ((code) & 0x18) +///< Size modifier +#define BPF_SIZE_W 0x00 ///< word +#define BPF_SIZE_H 0x08 ///< half word +#define BPF_SIZE_B 0x10 ///< byte +#define BPF_SIZE_DW 0x18 ///< eBPF only: double word + +#define BPF_MODE(code) ((code) & 0xe0) +///< Mode modifier +#define BPF_MODE_IMM 0x00 ///< used for 32-bit mov in cBPF and 64-bit in eBPF +#define BPF_MODE_ABS 0x20 +#define BPF_MODE_IND 0x40 +#define BPF_MODE_MEM 0x60 +#define BPF_MODE_LEN 0x80 ///< cBPF only, reserved in eBPF +#define BPF_MODE_MSH 0xa0 ///< cBPF only, reserved in eBPF +#define BPF_MODE_XADD 0xc0 ///< eBPF only: exclusive add + +#define BPF_MISCOP(code) ((code) & 0x80) +///< Operation of misc +#define BPF_MISCOP_TAX 0x00 +#define BPF_MISCOP_TXA 0x80 + +#endif diff --git a/arch/BPF/BPFDisassembler.c b/arch/BPF/BPFDisassembler.c new file mode 100644 index 00000000..cea47523 --- /dev/null +++ b/arch/BPF/BPFDisassembler.c @@ -0,0 +1,458 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifdef CAPSTONE_HAS_BPF + +#include +#include // offsetof macro + +#include "BPFConstants.h" +#include "BPFDisassembler.h" +#include "BPFMapping.h" +#include "../../cs_priv.h" + +static uint16_t read_u16(cs_struct *ud, const uint8_t *code) +{ + if (MODE_IS_BIG_ENDIAN(ud->mode)) + return (((uint16_t)code[0] << 8) | code[1]); + else + return (((uint16_t)code[1] << 8) | code[0]); +} + +static uint32_t read_u32(cs_struct *ud, const uint8_t *code) +{ + if (MODE_IS_BIG_ENDIAN(ud->mode)) + return ((uint32_t)read_u16(ud, code) << 16) | read_u16(ud, code + 2); + else + return ((uint32_t)read_u16(ud, code + 2) << 16) | read_u16(ud, code); +} + +///< Malloc bpf_internal, also checks if code_len is large enough. +static bpf_internal *alloc_bpf_internal(size_t code_len) +{ + bpf_internal *bpf; + + if (code_len < 8) + return NULL; + bpf = cs_mem_malloc(sizeof(bpf_internal)); + if (bpf == NULL) + return NULL; + /* default value */ + bpf->insn_size = 8; + return bpf; +} + +///< Fetch a cBPF structure from code +static bpf_internal* fetch_cbpf(cs_struct *ud, const uint8_t *code, + size_t code_len) +{ + bpf_internal *bpf; + + bpf = alloc_bpf_internal(code_len); + if (bpf == NULL) + return NULL; + + bpf->op = read_u16(ud, code); + bpf->jt = code[2]; + bpf->jf = code[3]; + bpf->k = read_u32(ud, code + 4); + return bpf; +} + +///< Fetch an eBPF structure from code +static bpf_internal* fetch_ebpf(cs_struct *ud, const uint8_t *code, + size_t code_len) +{ + bpf_internal *bpf; + + bpf = alloc_bpf_internal(code_len); + if (bpf == NULL) + return NULL; + + bpf->op = (uint16_t)code[0]; + + // eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM, + // in this case imm is combined with the next block's imm. + if (bpf->op == (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM)) { + if (code_len < 16) { + cs_mem_free(bpf); + return NULL; + } + bpf->k = read_u32(ud, code + 4) | (((uint64_t)read_u32(ud, code + 12)) << 32); + bpf->insn_size = 16; + } + else { + bpf->dst = code[1] & 0xf; + bpf->src = (code[1] & 0xf0) >> 4; + bpf->offset = read_u16(ud, code + 2); + bpf->k = read_u32(ud, code + 4); + } + return bpf; +} + +#define CHECK_READABLE_REG(ud, reg) do { \ + if (! ((reg) >= BPF_REG_R0 && (reg) <= BPF_REG_R10)) \ + return false; \ + } while (0) + +#define CHECK_WRITABLE_REG(ud, reg) do { \ + if (! ((reg) >= BPF_REG_R0 && (reg) < BPF_REG_R10)) \ + return false; \ + } while (0) + +#define CHECK_READABLE_AND_PUSH(ud, MI, r) do { \ + CHECK_READABLE_REG(ud, r + BPF_REG_R0); \ + MCOperand_CreateReg0(MI, r + BPF_REG_R0); \ + } while (0) + +#define CHECK_WRITABLE_AND_PUSH(ud, MI, r) do { \ + CHECK_WRITABLE_REG(ud, r + BPF_REG_R0); \ + MCOperand_CreateReg0(MI, r + BPF_REG_R0); \ + } while (0) + +static bool decodeLoad(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + if (!EBPF_MODE(ud)) { + /* + * +-----+-----------+--------------------+ + * | ldb | [k] | [x+k] | + * | ldh | [k] | [x+k] | + * +-----+-----------+--------------------+ + */ + if (BPF_SIZE(bpf->op) == BPF_SIZE_DW) + return false; + if (BPF_SIZE(bpf->op) == BPF_SIZE_B || BPF_SIZE(bpf->op) == BPF_SIZE_H) { + /* no ldx */ + if (BPF_CLASS(bpf->op) != BPF_CLASS_LD) + return false; + /* can only be BPF_ABS and BPF_IND */ + if (BPF_MODE(bpf->op) == BPF_MODE_ABS) { + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + else if (BPF_MODE(bpf->op) == BPF_MODE_IND) { + MCOperand_CreateReg0(MI, BPF_REG_X); + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + return false; + } + /* + * +-----+----+------+------+-----+-------+ + * | ld | #k | #len | M[k] | [k] | [x+k] | + * +-----+----+------+------+-----+-------+ + * | ldx | #k | #len | M[k] | 4*([k]&0xf) | + * +-----+----+------+------+-------------+ + */ + switch (BPF_MODE(bpf->op)) { + default: + break; + case BPF_MODE_IMM: + MCOperand_CreateImm0(MI, bpf->k); + return true; + case BPF_MODE_LEN: + return true; + case BPF_MODE_MEM: + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + if (BPF_CLASS(bpf->op) == BPF_CLASS_LD) { + if (BPF_MODE(bpf->op) == BPF_MODE_ABS) { + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + else if (BPF_MODE(bpf->op) == BPF_MODE_IND) { + MCOperand_CreateReg0(MI, BPF_REG_X); + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + } + else { /* LDX */ + if (BPF_MODE(bpf->op) == BPF_MODE_MSH) { + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + } + return false; + } + + /* eBPF mode */ + /* + * - IMM: lddw imm64 + * - ABS: ld{w,h,b,dw} [k] + * - IND: ld{w,h,b,dw} [src+k] + * - MEM: ldx{w,h,b,dw} dst, [src+off] + */ + if (BPF_CLASS(bpf->op) == BPF_CLASS_LD) { + switch (BPF_MODE(bpf->op)) { + case BPF_MODE_IMM: + if (bpf->op != (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM)) + return false; + MCOperand_CreateImm0(MI, bpf->k); + return true; + case BPF_MODE_ABS: + MCOperand_CreateImm0(MI, bpf->k); + return true; + case BPF_MODE_IND: + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + return false; + + } + /* LDX */ + if (BPF_MODE(bpf->op) == BPF_MODE_MEM) { + CHECK_WRITABLE_AND_PUSH(ud, MI, bpf->dst); + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + MCOperand_CreateImm0(MI, bpf->offset); + return true; + } + return false; +} + +static bool decodeStore(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + /* in cBPF, only BPF_ST* | BPF_MEM | BPF_W is valid + * while in eBPF: + * - BPF_STX | BPF_XADD | BPF_{W,DW} + * - BPF_ST* | BPF_MEM | BPF_{W,H,B,DW} + * are valid + */ + if (!EBPF_MODE(ud)) { + /* can only store to M[] */ + if (bpf->op != (BPF_CLASS(bpf->op) | BPF_MODE_MEM | BPF_SIZE_W)) + return false; + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + + /* eBPF */ + + if (BPF_MODE(bpf->op) == BPF_MODE_XADD) { + if (BPF_CLASS(bpf->op) != BPF_CLASS_STX) + return false; + if (BPF_SIZE(bpf->op) != BPF_SIZE_W && BPF_SIZE(bpf->op) != BPF_SIZE_DW) + return false; + /* xadd [dst + off], src */ + CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst); + MCOperand_CreateImm0(MI, bpf->offset); + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + return true; + } + + if (BPF_MODE(bpf->op) != BPF_MODE_MEM) + return false; + + /* st [dst + off], src */ + CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst); + MCOperand_CreateImm0(MI, bpf->offset); + if (BPF_CLASS(bpf->op) == BPF_CLASS_ST) + MCOperand_CreateImm0(MI, bpf->k); + else + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + return true; +} + +static bool decodeALU(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + /* Set MI->Operands */ + + /* cBPF */ + if (!EBPF_MODE(ud)) { + if (BPF_OP(bpf->op) > BPF_ALU_XOR) + return false; + /* cBPF's NEG has no operands */ + if (BPF_OP(bpf->op) == BPF_ALU_NEG) + return true; + if (BPF_SRC(bpf->op) == BPF_SRC_K) + MCOperand_CreateImm0(MI, bpf->k); + else /* BPF_SRC_X */ + MCOperand_CreateReg0(MI, BPF_REG_X); + return true; + } + + /* eBPF */ + + if (BPF_OP(bpf->op) > BPF_ALU_END) + return false; + /* ALU64 class doesn't have ENDian */ + /* ENDian's imm must be one of 16, 32, 64 */ + if (BPF_OP(bpf->op) == BPF_ALU_END) { + if (BPF_CLASS(bpf->op) == BPF_CLASS_ALU64) + return false; + if (bpf->k != 16 && bpf->k != 32 && bpf->k != 64) + return false; + } + + /* - op dst, imm + * - op dst, src + * - neg dst + * - le dst + */ + /* every ALU instructions have dst op */ + CHECK_WRITABLE_AND_PUSH(ud, MI, bpf->dst); + + /* special cases */ + if (BPF_OP(bpf->op) == BPF_ALU_NEG) + return true; + if (BPF_OP(bpf->op) == BPF_ALU_END) { + /* bpf->k must be one of 16, 32, 64 */ + MCInst_setOpcode(MI, MCInst_getOpcode(MI) | ((uint32_t)bpf->k << 4)); + return true; + } + + /* normal cases */ + if (BPF_SRC(bpf->op) == BPF_SRC_K) { + MCOperand_CreateImm0(MI, bpf->k); + } + else { /* BPF_SRC_X */ + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + } + return true; +} + +static bool decodeJump(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + /* cBPF and eBPF are very different in class jump */ + if (!EBPF_MODE(ud)) { + if (BPF_OP(bpf->op) > BPF_JUMP_JSET) + return false; + + /* ja is a special case of jumps */ + if (BPF_OP(bpf->op) == BPF_JUMP_JA) { + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + + if (BPF_SRC(bpf->op) == BPF_SRC_K) + MCOperand_CreateImm0(MI, bpf->k); + else /* BPF_SRC_X */ + MCOperand_CreateReg0(MI, BPF_REG_X); + MCOperand_CreateImm0(MI, bpf->jt); + MCOperand_CreateImm0(MI, bpf->jf); + } + else { + if (BPF_OP(bpf->op) > BPF_JUMP_JSLE) + return false; + + /* No operands for exit */ + if (BPF_OP(bpf->op) == BPF_JUMP_EXIT) + return bpf->op == (BPF_CLASS_JMP | BPF_JUMP_EXIT); + if (BPF_OP(bpf->op) == BPF_JUMP_CALL) { + if (bpf->op != (BPF_CLASS_JMP | BPF_JUMP_CALL)) + return false; + MCOperand_CreateImm0(MI, bpf->k); + return true; + } + + /* ja is a special case of jumps */ + if (BPF_OP(bpf->op) == BPF_JUMP_JA) { + if (BPF_SRC(bpf->op) != BPF_SRC_K) + return false; + MCOperand_CreateImm0(MI, bpf->offset); + return true; + } + + /* dst, src, +off */ + CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst); + if (BPF_SRC(bpf->op) == BPF_SRC_K) + MCOperand_CreateImm0(MI, bpf->k); + else + CHECK_READABLE_AND_PUSH(ud, MI, bpf->src); + MCOperand_CreateImm0(MI, bpf->offset); + } + return true; +} + +static bool decodeReturn(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + /* Here only handles the BPF_RET class in cBPF */ + switch (BPF_RVAL(bpf->op)) { + case BPF_SRC_K: + MCOperand_CreateImm0(MI, bpf->k); + return true; + case BPF_SRC_X: + MCOperand_CreateReg0(MI, BPF_REG_X); + return true; + case BPF_SRC_A: + MCOperand_CreateReg0(MI, BPF_REG_A); + return true; + } + return false; +} + +static bool decodeMISC(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + uint16_t op = bpf->op ^ BPF_CLASS_MISC; + return op == BPF_MISCOP_TAX || op == BPF_MISCOP_TXA; +} + +///< 1. Check if the instruction is valid +///< 2. Set MI->opcode +///< 3. Set MI->Operands +static bool getInstruction(cs_struct *ud, MCInst *MI, bpf_internal *bpf) +{ + cs_detail *detail; + + detail = MI->flat_insn->detail; + // initialize detail + if (detail) { + memset(detail, 0, offsetof(cs_detail, bpf) + sizeof(cs_bpf)); + } + + MCInst_clear(MI); + MCInst_setOpcode(MI, bpf->op); + + switch (BPF_CLASS(bpf->op)) { + default: /* should never happen */ + return false; + case BPF_CLASS_LD: + case BPF_CLASS_LDX: + return decodeLoad(ud, MI, bpf); + case BPF_CLASS_ST: + case BPF_CLASS_STX: + return decodeStore(ud, MI, bpf); + case BPF_CLASS_ALU: + return decodeALU(ud, MI, bpf); + case BPF_CLASS_JMP: + return decodeJump(ud, MI, bpf); + case BPF_CLASS_RET: + /* eBPF doesn't have this class */ + if (EBPF_MODE(ud)) + return false; + return decodeReturn(ud, MI, bpf); + case BPF_CLASS_MISC: + /* case BPF_CLASS_ALU64: */ + if (EBPF_MODE(ud)) + return decodeALU(ud, MI, bpf); + else + return decodeMISC(ud, MI, bpf); + } +} + +bool BPF_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, void *info) +{ + cs_struct *cs; + bpf_internal *bpf; + + cs = (cs_struct*)ud; + if (EBPF_MODE(cs)) + bpf = fetch_ebpf(cs, code, code_len); + else + bpf = fetch_cbpf(cs, code, code_len); + if (bpf == NULL) + return false; + if (!getInstruction(cs, instr, bpf)) { + cs_mem_free(bpf); + return false; + } + + *size = bpf->insn_size; + cs_mem_free(bpf); + + return true; +} + +#endif diff --git a/arch/BPF/BPFDisassembler.h b/arch/BPF/BPFDisassembler.h new file mode 100644 index 00000000..9616b081 --- /dev/null +++ b/arch/BPF/BPFDisassembler.h @@ -0,0 +1,27 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifndef CS_BPF_DISASSEMBLER_H +#define CS_BPF_DISASSEMBLER_H + +#include "../../MCInst.h" + +typedef struct bpf_internal { + uint16_t op; + uint64_t k; + /* for cBPF */ + uint8_t jt; + uint8_t jf; + /* for eBPF */ + uint8_t dst; + uint8_t src; + uint16_t offset; + + /* length of this bpf instruction */ + uint8_t insn_size; +} bpf_internal; + +bool BPF_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, void *info); + +#endif diff --git a/arch/BPF/BPFInstPrinter.c b/arch/BPF/BPFInstPrinter.c new file mode 100644 index 00000000..782d8cbc --- /dev/null +++ b/arch/BPF/BPFInstPrinter.c @@ -0,0 +1,280 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#include + +#include "BPFConstants.h" +#include "BPFInstPrinter.h" +#include "BPFMapping.h" + +static cs_bpf_op *expand_bpf_operands(cs_bpf *bpf) +{ + /* assert(bpf->op_count < 3); */ + return &bpf->operands[bpf->op_count++]; +} + +static void push_op_reg(cs_bpf *bpf, bpf_op_type val, uint8_t ac_mode) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_REG; + op->reg = val; + op->access = ac_mode; +} + +static void push_op_imm(cs_bpf *bpf, uint64_t val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_IMM; + op->imm = val; +} + +static void push_op_off(cs_bpf *bpf, uint32_t val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_OFF; + op->off = val; +} + +static void push_op_mem(cs_bpf *bpf, bpf_reg reg, uint32_t val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_MEM; + op->mem.base = reg; + op->mem.disp = val; +} + +static void push_op_mmem(cs_bpf *bpf, uint32_t val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_MMEM; + op->mmem = val; +} + +static void push_op_msh(cs_bpf *bpf, uint32_t val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_MSH; + op->msh = val; +} + +static void push_op_ext(cs_bpf *bpf, bpf_ext_type val) +{ + cs_bpf_op *op = expand_bpf_operands(bpf); + + op->type = BPF_OP_EXT; + op->ext = val; +} + +static void convert_operands(MCInst *MI, cs_bpf *bpf) +{ + unsigned opcode = MCInst_getOpcode(MI); + unsigned mc_op_count = MCInst_getNumOperands(MI); + MCOperand *op; + MCOperand *op2; + unsigned i; + + bpf->op_count = 0; + if (BPF_CLASS(opcode) == BPF_CLASS_LD || BPF_CLASS(opcode) == BPF_CLASS_LDX) { + switch (BPF_MODE(opcode)) { + case BPF_MODE_IMM: + push_op_imm(bpf, MCOperand_getImm(MCInst_getOperand(MI, 0))); + break; + case BPF_MODE_ABS: + op = MCInst_getOperand(MI, 0); + push_op_mem(bpf, BPF_REG_INVALID, (uint32_t)MCOperand_getImm(op)); + break; + case BPF_MODE_IND: + op = MCInst_getOperand(MI, 0); + op2 = MCInst_getOperand(MI, 1); + push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2)); + break; + case BPF_MODE_MEM: + if (EBPF_MODE(MI->csh)) { + /* ldx{w,h,b,dw} dst, [src+off] */ + push_op_reg(bpf, MCOperand_getReg(MCInst_getOperand(MI, 0)), CS_AC_WRITE); + op = MCInst_getOperand(MI, 1); + op2 = MCInst_getOperand(MI, 2); + push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2)); + } + else { + push_op_mmem(bpf, (uint32_t)MCOperand_getImm(MCInst_getOperand(MI, 0))); + } + break; + case BPF_MODE_LEN: + push_op_ext(bpf, BPF_EXT_LEN); + break; + case BPF_MODE_MSH: + op = MCInst_getOperand(MI, 0); + push_op_msh(bpf, (uint32_t)MCOperand_getImm(op)); + break; + /* case BPF_MODE_XADD: // not exists */ + } + return; + } + if (BPF_CLASS(opcode) == BPF_CLASS_ST || BPF_CLASS(opcode) == BPF_CLASS_STX) { + if (!EBPF_MODE(MI->csh)) { + // cBPF has only one case - st* M[k] + push_op_mmem(bpf, (uint32_t)MCOperand_getImm(MCInst_getOperand(MI, 0))); + return; + } + /* eBPF has two cases: + * - st [dst + off], src + * - xadd [dst + off], src + * they have same form of operands. + */ + op = MCInst_getOperand(MI, 0); + op2 = MCInst_getOperand(MI, 1); + push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2)); + op = MCInst_getOperand(MI, 2); + if (MCOperand_isImm(op)) + push_op_imm(bpf, MCOperand_getImm(op)); + else if (MCOperand_isReg(op)) + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ); + return; + } + + if (BPF_CLASS(opcode) == BPF_CLASS_JMP) { + for (i = 0; i < mc_op_count; i++) { + op = MCInst_getOperand(MI, i); + if (MCOperand_isImm(op)) { + /* decide the imm is BPF_OP_IMM or BPF_OP_OFF type here */ + /* + * 1. ja +off + * 2. j {x,k}, +jt, +jf // cBPF + * 3. j dst_reg, {src_reg, k}, +off // eBPF + */ + if (BPF_OP(opcode) == BPF_JUMP_JA || + (!EBPF_MODE(MI->csh) && i >= 1) || + (EBPF_MODE(MI->csh) && i == 2)) + push_op_off(bpf, (uint32_t)MCOperand_getImm(op)); + else + push_op_imm(bpf, MCOperand_getImm(op)); + } + else if (MCOperand_isReg(op)) { + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ); + } + } + return; + } + + if (!EBPF_MODE(MI->csh)) { + /* In cBPF mode, all registers in operands are accessed as read */ + for (i = 0; i < mc_op_count; i++) { + op = MCInst_getOperand(MI, i); + if (MCOperand_isImm(op)) + push_op_imm(bpf, MCOperand_getImm(op)); + else if (MCOperand_isReg(op)) + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ); + } + return; + } + + /* remain cases are: eBPF mode && ALU */ + /* if (BPF_CLASS(opcode) == BPF_CLASS_ALU || BPF_CLASS(opcode) == BPF_CLASS_ALU64) */ + + /* We have three types: + * 1. {l,b}e dst // dst = byteswap(dst) + * 2. neg dst // dst = -dst + * 3. dst, {src_reg, imm} // dst = dst src + * so we can simply check the number of operands, + * exactly one operand means we are in case 1. and 2., + * otherwise in case 3. + */ + if (mc_op_count == 1) { + op = MCInst_getOperand(MI, 0); + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ | CS_AC_WRITE); + } + else { // if (mc_op_count == 2) + op = MCInst_getOperand(MI, 0); + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ | CS_AC_WRITE); + + op = MCInst_getOperand(MI, 1); + if (MCOperand_isImm(op)) + push_op_imm(bpf, MCOperand_getImm(op)); + else if (MCOperand_isReg(op)) + push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ); + } +} + +static void print_operand(MCInst *MI, struct SStream *O, const cs_bpf_op *op) +{ + switch (op->type) { + case BPF_OP_INVALID: + SStream_concat(O, "invalid"); + break; + case BPF_OP_REG: + SStream_concat(O, BPF_reg_name((csh)MI->csh, op->reg)); + break; + case BPF_OP_IMM: + SStream_concat(O, "0x%" PRIx64, op->imm); + break; + case BPF_OP_OFF: + SStream_concat(O, "+0x%x", op->off); + break; + case BPF_OP_MEM: + SStream_concat(O, "["); + if (op->mem.base != BPF_REG_INVALID) + SStream_concat(O, BPF_reg_name((csh)MI->csh, op->mem.base)); + if (op->mem.disp != 0) { + if (op->mem.base != BPF_REG_INVALID) + SStream_concat(O, "+"); + SStream_concat(O, "0x%x", op->mem.disp); + } + if (op->mem.base == BPF_REG_INVALID && op->mem.disp == 0) // special case + SStream_concat(O, "0x0"); + SStream_concat(O, "]"); + break; + case BPF_OP_MMEM: + SStream_concat(O, "m[0x%x]", op->mmem); + break; + case BPF_OP_MSH: + SStream_concat(O, "4*([0x%x]&0xf)", op->msh); + break; + case BPF_OP_EXT: + switch (op->ext) { + case BPF_EXT_LEN: + SStream_concat(O, "#len"); + break; + } + break; + } +} + +/* + * 1. human readable mnemonic + * 2. set pubOpcode (BPF_INSN_*) + * 3. set detail->bpf.operands + * */ +void BPF_printInst(MCInst *MI, struct SStream *O, void *PrinterInfo) +{ + int i; + cs_insn insn; + cs_bpf bpf; + + insn.detail = NULL; + /* set pubOpcode as instruction id */ + BPF_get_insn_id((cs_struct*)MI->csh, &insn, MCInst_getOpcode(MI)); + MCInst_setOpcodePub(MI, insn.id); + + SStream_concat(O, BPF_insn_name((csh)MI->csh, insn.id)); + convert_operands(MI, &bpf); + for (i = 0; i < bpf.op_count; i++) { + if (i == 0) + SStream_concat(O, "\t"); + else + SStream_concat(O, ", "); + print_operand(MI, O, &bpf.operands[i]); + } + +#ifndef CAPSTONE_DIET + if (MI->flat_insn->detail) { + MI->flat_insn->detail->bpf = bpf; + } +#endif +} diff --git a/arch/BPF/BPFInstPrinter.h b/arch/BPF/BPFInstPrinter.h new file mode 100644 index 00000000..685a8d65 --- /dev/null +++ b/arch/BPF/BPFInstPrinter.h @@ -0,0 +1,16 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifndef CS_BPFINSTPRINTER_H +#define CS_BPFINSTPRINTER_H + +#include + +#include "../../MCInst.h" +#include "../../SStream.h" + +struct SStream; + +void BPF_printInst(MCInst *MI, struct SStream *O, void *Info); + +#endif diff --git a/arch/BPF/BPFMapping.c b/arch/BPF/BPFMapping.c new file mode 100644 index 00000000..33fae2c6 --- /dev/null +++ b/arch/BPF/BPFMapping.c @@ -0,0 +1,506 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#include + +#include "BPFConstants.h" +#include "BPFMapping.h" +#include "../../utils.h" + +#ifndef CAPSTONE_DIET +static const name_map group_name_maps[] = { + { BPF_GRP_INVALID, NULL }, + + { BPF_GRP_LOAD, "load" }, + { BPF_GRP_STORE, "store" }, + { BPF_GRP_ALU, "alu" }, + { BPF_GRP_JUMP, "jump" }, + { BPF_GRP_CALL, "call" }, + { BPF_GRP_RETURN, "return" }, + { BPF_GRP_MISC, "misc" }, +}; +#endif + +const char *BPF_group_name(csh handle, unsigned int id) +{ +#ifndef CAPSTONE_DIET + return id2name(group_name_maps, ARR_SIZE(group_name_maps), id); +#else + return NULL; +#endif +} + +#ifndef CAPSTONE_DIET +static const name_map insn_name_maps[BPF_INS_ENDING] = { + { BPF_INS_INVALID, NULL }, + + { BPF_INS_ADD, "add" }, + { BPF_INS_SUB, "sub" }, + { BPF_INS_MUL, "mul" }, + { BPF_INS_DIV, "div" }, + { BPF_INS_OR, "or" }, + { BPF_INS_AND, "and" }, + { BPF_INS_LSH, "lsh" }, + { BPF_INS_RSH, "rsh" }, + { BPF_INS_NEG, "neg" }, + { BPF_INS_MOD, "mod" }, + { BPF_INS_XOR, "xor" }, + { BPF_INS_MOV, "mov" }, + { BPF_INS_ARSH, "arsh" }, + + { BPF_INS_ADD64, "add64" }, + { BPF_INS_SUB64, "sub64" }, + { BPF_INS_MUL64, "mul64" }, + { BPF_INS_DIV64, "div64" }, + { BPF_INS_OR64, "or64" }, + { BPF_INS_AND64, "and64" }, + { BPF_INS_LSH64, "lsh64" }, + { BPF_INS_RSH64, "rsh64" }, + { BPF_INS_NEG64, "neg64" }, + { BPF_INS_MOD64, "mod64" }, + { BPF_INS_XOR64, "xor64" }, + { BPF_INS_MOV64, "mov64" }, + { BPF_INS_ARSH64, "arsh64" }, + + { BPF_INS_LE16, "le16" }, + { BPF_INS_LE32, "le32" }, + { BPF_INS_LE64, "le64" }, + { BPF_INS_BE16, "be16" }, + { BPF_INS_BE32, "be32" }, + { BPF_INS_BE64, "be64" }, + + { BPF_INS_LDW, "ldw" }, + { BPF_INS_LDH, "ldh" }, + { BPF_INS_LDB, "ldb" }, + { BPF_INS_LDDW, "lddw" }, + { BPF_INS_LDXW, "ldxw" }, + { BPF_INS_LDXH, "ldxh" }, + { BPF_INS_LDXB, "ldxb" }, + { BPF_INS_LDXDW, "ldxdw" }, + + { BPF_INS_STW, "stw" }, + { BPF_INS_STH, "sth" }, + { BPF_INS_STB, "stb" }, + { BPF_INS_STDW, "stdw" }, + { BPF_INS_STXW, "stxw" }, + { BPF_INS_STXH, "stxh" }, + { BPF_INS_STXB, "stxb" }, + { BPF_INS_STXDW, "stxdw" }, + { BPF_INS_XADDW, "xaddw" }, + { BPF_INS_XADDDW, "xadddw" }, + + { BPF_INS_JMP, "jmp" }, + { BPF_INS_JEQ, "jeq" }, + { BPF_INS_JGT, "jgt" }, + { BPF_INS_JGE, "jge" }, + { BPF_INS_JSET, "jset" }, + { BPF_INS_JNE, "jne" }, + { BPF_INS_JSGT, "jsgt" }, + { BPF_INS_JSGE, "jsge" }, + { BPF_INS_CALL, "call" }, + { BPF_INS_EXIT, "exit" }, + { BPF_INS_JLT, "jlt" }, + { BPF_INS_JLE, "jle" }, + { BPF_INS_JSLT, "jslt" }, + { BPF_INS_JSLE, "jsle" }, + + { BPF_INS_RET, "ret" }, + + { BPF_INS_TAX, "tax" }, + { BPF_INS_TXA, "txa" }, +}; +#endif + +const char *BPF_insn_name(csh handle, unsigned int id) +{ +#ifndef CAPSTONE_DIET + /* We have some special cases because 'ld' in cBPF is equivalent to 'ldw' + * in eBPF, and we don't want to see 'ldw' appears in cBPF mode. + */ + if (!EBPF_MODE(handle)) { + switch (id) { + case BPF_INS_LD: return "ld"; + case BPF_INS_LDX: return "ldx"; + case BPF_INS_ST: return "st"; + case BPF_INS_STX: return "stx"; + } + } + return id2name(insn_name_maps, ARR_SIZE(insn_name_maps), id); +#else + return NULL; +#endif +} + +const char *BPF_reg_name(csh handle, unsigned int reg) +{ +#ifndef CAPSTONE_DIET + if (EBPF_MODE(handle)) { + if (reg < BPF_REG_R0 || reg > BPF_REG_R10) + return NULL; + static const char* reg_names[11] = { + "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8", "r9", + "r10" + }; + return reg_names[reg - BPF_REG_R0]; + } + + /* cBPF mode */ + if (reg == BPF_REG_A) + return "a"; + else if (reg == BPF_REG_X) + return "x"; + else + return NULL; +#else + return NULL; +#endif +} + +static bpf_insn op2insn_ld(unsigned opcode) +{ +#define CASE(c) case BPF_SIZE_##c: \ + if (BPF_CLASS(opcode) == BPF_CLASS_LD) \ + return BPF_INS_LD##c; \ + else \ + return BPF_INS_LDX##c; + + switch (BPF_SIZE(opcode)) { + CASE(W); + CASE(H); + CASE(B); + CASE(DW); + } +#undef CASE + + return BPF_INS_INVALID; +} + +static bpf_insn op2insn_st(unsigned opcode) +{ + /* + * - BPF_STX | BPF_XADD | BPF_{W,DW} + * - BPF_ST* | BPF_MEM | BPF_{W,H,B,DW} + */ + + if (opcode == (BPF_CLASS_STX | BPF_MODE_XADD | BPF_SIZE_W)) + return BPF_INS_XADDW; + if (opcode == (BPF_CLASS_STX | BPF_MODE_XADD | BPF_SIZE_DW)) + return BPF_INS_XADDDW; + + /* should be BPF_MEM */ +#define CASE(c) case BPF_SIZE_##c: \ + if (BPF_CLASS(opcode) == BPF_CLASS_ST) \ + return BPF_INS_ST##c; \ + else \ + return BPF_INS_STX##c; + switch (BPF_SIZE(opcode)) { + CASE(W); + CASE(H); + CASE(B); + CASE(DW); + } +#undef CASE + + return BPF_INS_INVALID; +} + +static bpf_insn op2insn_alu(unsigned opcode) +{ + /* Endian is a special case */ + if (BPF_OP(opcode) == BPF_ALU_END) { + switch (opcode ^ BPF_CLASS_ALU ^ BPF_ALU_END) { + case BPF_SRC_LITTLE | (16 << 4): + return BPF_INS_LE16; + case BPF_SRC_LITTLE | (32 << 4): + return BPF_INS_LE32; + case BPF_SRC_LITTLE | (64 << 4): + return BPF_INS_LE64; + case BPF_SRC_BIG | (16 << 4): + return BPF_INS_BE16; + case BPF_SRC_BIG | (32 << 4): + return BPF_INS_BE32; + case BPF_SRC_BIG | (64 << 4): + return BPF_INS_BE64; + } + return BPF_INS_INVALID; + } + +#define CASE(c) case BPF_ALU_##c: \ + if (BPF_CLASS(opcode) == BPF_CLASS_ALU) \ + return BPF_INS_##c; \ + else \ + return BPF_INS_##c##64; + + switch (BPF_OP(opcode)) { + CASE(ADD); + CASE(SUB); + CASE(MUL); + CASE(DIV); + CASE(OR); + CASE(AND); + CASE(LSH); + CASE(RSH); + CASE(NEG); + CASE(MOD); + CASE(XOR); + CASE(MOV); + CASE(ARSH); + } +#undef CASE + + return BPF_INS_INVALID; +} + +static bpf_insn op2insn_jmp(unsigned opcode) +{ +#define CASE(c) case BPF_JUMP_##c: return BPF_INS_##c + switch (BPF_OP(opcode)) { + case BPF_JUMP_JA: + return BPF_INS_JMP; + CASE(JEQ); + CASE(JGT); + CASE(JGE); + CASE(JSET); + CASE(JNE); + CASE(JSGT); + CASE(JSGE); + CASE(CALL); + CASE(EXIT); + CASE(JLT); + CASE(JLE); + CASE(JSLT); + CASE(JSLE); + } +#undef CASE + + return BPF_INS_INVALID; +} + +static void update_regs_access(cs_struct *ud, cs_detail *detail, + bpf_insn insn_id, unsigned int opcode) +{ + if (insn_id == BPF_INS_INVALID) + return; +#define PUSH_READ(r) do { \ + detail->regs_read[detail->regs_read_count] = r; \ + detail->regs_read_count++; \ + } while (0) +#define PUSH_WRITE(r) do { \ + detail->regs_write[detail->regs_write_count] = r; \ + detail->regs_write_count++; \ + } while (0) + /* + * In eBPF mode, only these instructions have implicit registers access: + * - ld{w,h,b,dw} * // w: r0 + * - exit // r: r0 + */ + if (EBPF_MODE(ud)) { + switch (insn_id) { + default: + break; + case BPF_INS_LDW: + case BPF_INS_LDH: + case BPF_INS_LDB: + case BPF_INS_LDDW: + PUSH_WRITE(BPF_REG_R0); + break; + case BPF_INS_EXIT: + PUSH_READ(BPF_REG_R0); + break; + } + return; + } + + /* cBPF mode */ + switch (BPF_CLASS(opcode)) { + default: + break; + case BPF_CLASS_LD: + PUSH_WRITE(BPF_REG_A); + break; + case BPF_CLASS_LDX: + PUSH_WRITE(BPF_REG_X); + break; + case BPF_CLASS_ST: + PUSH_READ(BPF_REG_A); + break; + case BPF_CLASS_STX: + PUSH_READ(BPF_REG_X); + break; + case BPF_CLASS_ALU: + PUSH_READ(BPF_REG_A); + PUSH_WRITE(BPF_REG_A); + break; + case BPF_CLASS_JMP: + if (insn_id != BPF_INS_JMP) // except the unconditional jump + PUSH_READ(BPF_REG_A); + break; + /* case BPF_CLASS_RET: */ + case BPF_CLASS_MISC: + if (insn_id == BPF_INS_TAX) { + PUSH_READ(BPF_REG_A); + PUSH_WRITE(BPF_REG_X); + } + else { + PUSH_READ(BPF_REG_X); + PUSH_WRITE(BPF_REG_A); + } + break; + } +} + +/* + * 1. Convert opcode(id) to BPF_INS_* + * 2. Set regs_read/regs_write/groups + */ +void BPF_get_insn_id(cs_struct *ud, cs_insn *insn, unsigned int opcode) +{ + // No need to care the mode (cBPF or eBPF) since all checks has be done in + // BPF_getInstruction, we can simply map opcode to BPF_INS_*. + cs_detail *detail; + bpf_insn id = BPF_INS_INVALID; + bpf_insn_group grp; + + detail = insn->detail; +#ifndef CAPSTONE_DIET + #define PUSH_GROUP(grp) do { \ + if (detail) { \ + detail->groups[detail->groups_count] = grp; \ + detail->groups_count++; \ + } \ + } while(0) +#else + #define PUSH_GROUP +#endif + + switch (BPF_CLASS(opcode)) { + default: // will never happen + break; + case BPF_CLASS_LD: + case BPF_CLASS_LDX: + id = op2insn_ld(opcode); + PUSH_GROUP(BPF_GRP_LOAD); + break; + case BPF_CLASS_ST: + case BPF_CLASS_STX: + id = op2insn_st(opcode); + PUSH_GROUP(BPF_GRP_STORE); + break; + case BPF_CLASS_ALU: + id = op2insn_alu(opcode); + PUSH_GROUP(BPF_GRP_ALU); + break; + case BPF_CLASS_JMP: + grp = BPF_GRP_JUMP; + id = op2insn_jmp(opcode); + if (id == BPF_INS_CALL) + grp = BPF_GRP_CALL; + else if (id == BPF_INS_EXIT) + grp = BPF_GRP_RETURN; + PUSH_GROUP(grp); + break; + case BPF_CLASS_RET: + id = BPF_INS_RET; + PUSH_GROUP(BPF_GRP_RETURN); + break; + // BPF_CLASS_MISC and BPF_CLASS_ALU64 have exactly same value + case BPF_CLASS_MISC: + /* case BPF_CLASS_ALU64: */ + if (EBPF_MODE(ud)) { + // ALU64 in eBPF + id = op2insn_alu(opcode); + PUSH_GROUP(BPF_GRP_ALU); + } + else { + if (BPF_MISCOP(opcode) == BPF_MISCOP_TXA) + id = BPF_INS_TXA; + else + id = BPF_INS_TAX; + PUSH_GROUP(BPF_GRP_MISC); + } + break; + } + + insn->id = id; +#undef PUSH_GROUP + +#ifndef CAPSTONE_DIET + if (detail) { + update_regs_access(ud, detail, id, opcode); + } +#endif +} + +static void sort_and_uniq(cs_regs arr, uint8_t n, uint8_t *new_n) +{ + /* arr is always a tiny (usually n < 3) array, + * a simple O(n^2) sort is efficient enough. */ + int i; + int j; + int iMin; + int tmp; + + /* a modified selection sort for sorting and making unique */ + for (j = 0; j < n; j++) { + /* arr[iMin] will be min(arr[j .. n-1]) */ + iMin = j; + for (i = j + 1; i < n; i++) { + if (arr[i] < arr[iMin]) + iMin = i; + } + if (j != 0 && arr[iMin] == arr[j - 1]) { // duplicate ele found + arr[iMin] = arr[n - 1]; + --n; + } + else { + tmp = arr[iMin]; + arr[iMin] = arr[j]; + arr[j] = tmp; + } + } + + *new_n = n; +} +void BPF_reg_access(const cs_insn *insn, + cs_regs regs_read, uint8_t *regs_read_count, + cs_regs regs_write, uint8_t *regs_write_count) +{ + unsigned i; + uint8_t read_count, write_count; + const cs_bpf *bpf = &(insn->detail->bpf); + + read_count = insn->detail->regs_read_count; + write_count = insn->detail->regs_write_count; + + // implicit registers + memcpy(regs_read, insn->detail->regs_read, read_count * sizeof(insn->detail->regs_read[0])); + memcpy(regs_write, insn->detail->regs_write, write_count * sizeof(insn->detail->regs_write[0])); + + for (i = 0; i < bpf->op_count; i++) { + const cs_bpf_op *op = &(bpf->operands[i]); + switch (op->type) { + default: + break; + case BPF_OP_REG: + if (op->access & CS_AC_READ) { + regs_read[read_count] = op->reg; + read_count++; + } + if (op->access & CS_AC_WRITE) { + regs_write[write_count] = op->reg; + write_count++; + } + break; + case BPF_OP_MEM: + if (op->mem.base != BPF_REG_INVALID) { + regs_read[read_count] = op->mem.base; + read_count++; + } + break; + } + } + + sort_and_uniq(regs_read, read_count, regs_read_count); + sort_and_uniq(regs_write, write_count, regs_write_count); +} diff --git a/arch/BPF/BPFMapping.h b/arch/BPF/BPFMapping.h new file mode 100644 index 00000000..1401ee86 --- /dev/null +++ b/arch/BPF/BPFMapping.h @@ -0,0 +1,21 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifndef CS_BPFMAPPING_H +#define CS_BPFMAPPING_H + +#include + +#include "../../cs_priv.h" + +#define EBPF_MODE(ud) (((cs_struct*)ud)->mode & CS_MODE_BPF_EXTENDED) + +const char *BPF_group_name(csh handle, unsigned int id); +const char *BPF_insn_name(csh handle, unsigned int id); +const char *BPF_reg_name(csh handle, unsigned int reg); +void BPF_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id); +void BPF_reg_access(const cs_insn *insn, + cs_regs regs_read, uint8_t *regs_read_count, + cs_regs regs_write, uint8_t *regs_write_count); + +#endif diff --git a/arch/BPF/BPFModule.c b/arch/BPF/BPFModule.c new file mode 100644 index 00000000..d744b827 --- /dev/null +++ b/arch/BPF/BPFModule.c @@ -0,0 +1,34 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifdef CAPSTONE_HAS_BPF + +#include "BPFDisassembler.h" +#include "BPFInstPrinter.h" +#include "BPFMapping.h" +#include "BPFModule.h" + +cs_err BPF_global_init(cs_struct *ud) +{ + ud->printer = BPF_printInst; + ud->reg_name = BPF_reg_name; + ud->insn_id = BPF_get_insn_id; + ud->insn_name = BPF_insn_name; + ud->group_name = BPF_group_name; +#ifndef CAPSTONE_DIET + ud->reg_access = BPF_reg_access; +#endif + ud->disasm = BPF_getInstruction; + + return CS_ERR_OK; +} + +cs_err BPF_option(cs_struct *handle, cs_opt_type type, size_t value) +{ + if (type == CS_OPT_MODE) + handle->mode = (cs_mode)value; + + return CS_ERR_OK; +} + +#endif diff --git a/arch/BPF/BPFModule.h b/arch/BPF/BPFModule.h new file mode 100644 index 00000000..0ff00338 --- /dev/null +++ b/arch/BPF/BPFModule.h @@ -0,0 +1,12 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifndef CS_BPF_MODULE_H +#define CS_BPF_MODULE_H + +#include "../../utils.h" + +cs_err BPF_global_init(cs_struct *ud); +cs_err BPF_option(cs_struct *handle, cs_opt_type type, size_t value); + +#endif diff --git a/bindings/Makefile b/bindings/Makefile index 1eb94d10..d22bb21f 100644 --- a/bindings/Makefile +++ b/bindings/Makefile @@ -13,6 +13,7 @@ TEST_SPARC = $(TMPDIR)/test_sparc TEST_SYSZ = $(TMPDIR)/test_systemz TEST_X86 = $(TMPDIR)/test_x86 TEST_XCORE = $(TMPDIR)/test_xcore +TEST_BPF = $(TMPDIR)/test_bpf PYTHON2 = python @@ -42,6 +43,7 @@ expected: ../tests/test_systemz > $(TEST_SYSZ)_e ../tests/test_x86 > $(TEST_X86)_e ../tests/test_xcore > $(TEST_XCORE)_e + ../tests/test_bpf > $(TEST_BPF)_e python: FORCE cd python && $(MAKE) @@ -56,6 +58,7 @@ python: FORCE $(PYTHON2) python/test_systemz.py > $(TEST_SYSZ)_o $(PYTHON2) python/test_x86.py > $(TEST_X86)_o $(PYTHON2) python/test_xcore.py > $(TEST_XCORE)_o + $(PYTHON2) python/test_bpf.py > $(TEST_BPF)_o $(MAKE) test_diff java: FORCE @@ -85,6 +88,7 @@ test_diff: FORCE $(DIFF) $(TEST_SYSZ)_e $(TEST_SYSZ)_o $(DIFF) $(TEST_X86)_e $(TEST_X86)_o $(DIFF) $(TEST_XCORE)_e $(TEST_XCORE)_o + $(DIFF) $(TEST_BPF)_e $(TEST_BPF)_o clean: rm -rf $(TMPDIR) diff --git a/bindings/const_generator.py b/bindings/const_generator.py index d6a6fbf7..32baf929 100644 --- a/bindings/const_generator.py +++ b/bindings/const_generator.py @@ -5,7 +5,7 @@ import sys, re INCL_DIR = '../include/capstone/' -include = [ 'arm.h', 'arm64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h', 'wasm.h' ] +include = [ 'arm.h', 'arm64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h', 'wasm.h', 'bpf.h' ] template = { 'java': { @@ -50,6 +50,7 @@ template = { 'evm.h': 'evm', 'wasm.h': 'wasm', 'mos65xx.h': 'mos65xx', + 'bpf.h': 'bpf', 'comment_open': '#', 'comment_close': '', }, diff --git a/bindings/python/Makefile b/bindings/python/Makefile index be85d1c2..7ae9abf4 100644 --- a/bindings/python/Makefile +++ b/bindings/python/Makefile @@ -70,7 +70,7 @@ clean: TESTS = test_basic.py test_detail.py test_arm.py test_arm64.py test_m68k.py test_mips.py TESTS += test_ppc.py test_sparc.py test_systemz.py test_x86.py test_xcore.py test_tms320c64x.py -TESTS += test_m680x.py test_skipdata.py test_mos65xx.py +TESTS += test_m680x.py test_skipdata.py test_mos65xx.py test_bpf.py TESTS += test_evm.py check: diff --git a/bindings/python/capstone/__init__.py b/bindings/python/capstone/__init__.py index d6fe4649..a0d981ba 100644 --- a/bindings/python/capstone/__init__.py +++ b/bindings/python/capstone/__init__.py @@ -36,6 +36,7 @@ __all__ = [ 'CS_ARCH_TMS320C64X', 'CS_ARCH_M680X', 'CS_ARCH_EVM', + 'CS_ARCH_BPF', 'CS_ARCH_ALL', 'CS_MODE_LITTLE_ENDIAN', @@ -71,6 +72,8 @@ __all__ = [ 'CS_MODE_M680X_6811', 'CS_MODE_M680X_CPU12', 'CS_MODE_M680X_HCS08', + 'CS_MODE_BPF_CLASSIC', + 'CS_MODE_BPF_EXTENDED', 'CS_OPT_SYNTAX', 'CS_OPT_SYNTAX_DEFAULT', @@ -153,7 +156,9 @@ CS_ARCH_TMS320C64X = 9 CS_ARCH_M680X = 10 CS_ARCH_EVM = 11 CS_ARCH_MOS65XX = 12 -CS_ARCH_MAX = 13 +CS_ARCH_WASM = 13 +CS_ARCH_BPF = 14 +CS_ARCH_MAX = 15 CS_ARCH_ALL = 0xFFFF # disasm mode @@ -190,6 +195,8 @@ CS_MODE_M680X_6809 = (1 << 7) # M680X M6809 mode CS_MODE_M680X_6811 = (1 << 8) # M680X M68HC11 mode CS_MODE_M680X_CPU12 = (1 << 9) # M680X CPU12 mode CS_MODE_M680X_HCS08 = (1 << 10) # M680X HCS08 mode +CS_MODE_BPF_CLASSIC = 0 # Classic BPF mode (default) +CS_MODE_BPF_EXTENDED = 1 << 0 # Extended BPF mode # Capstone option type CS_OPT_SYNTAX = 1 # Intel X86 asm syntax (CS_ARCH_X86 arch) @@ -329,7 +336,7 @@ def copy_ctypes_list(src): return [copy_ctypes(n) for n in src] # Weird import placement because these modules are needed by the below code but need the above functions -from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx +from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx, bpf class _cs_arch(ctypes.Union): _fields_ = ( @@ -346,6 +353,7 @@ class _cs_arch(ctypes.Union): ('m680x', m680x.CsM680x), ('evm', evm.CsEvm), ('mos65xx', mos65xx.CsMOS65xx), + ('bpf', bpf.CsBPF), ) class _cs_detail(ctypes.Structure): @@ -662,6 +670,8 @@ class CsInsn(object): (self.pop, self.push, self.fee) = evm.get_arch_info(self._raw.detail.contents.arch.evm) elif arch == CS_ARCH_MOS65XX: (self.am, self.modifies_flags, self.operands) = mos65xx.get_arch_info(self._raw.detail.contents.arch.mos65xx) + elif arch == CS_ARCH_BPF: + (self.operands) = bpf.get_arch_info(self._raw.detail.contents.arch.bpf) def __getattr__(self, name): @@ -1116,10 +1126,13 @@ def debug(): else: diet = "standard" - archs = { "arm": CS_ARCH_ARM, "arm64": CS_ARCH_ARM64, "m68k": CS_ARCH_M68K, \ - "mips": CS_ARCH_MIPS, "ppc": CS_ARCH_PPC, "sparc": CS_ARCH_SPARC, \ - "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, \ - "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM, 'mos65xx': CS_ARCH_MOS65XX } + archs = { + "arm": CS_ARCH_ARM, "arm64": CS_ARCH_ARM64, "m68k": CS_ARCH_M68K, + "mips": CS_ARCH_MIPS, "ppc": CS_ARCH_PPC, "sparc": CS_ARCH_SPARC, + "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, + "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM, 'mos65xx': CS_ARCH_MOS65XX, + 'bpf': CS_ARCH_BPF, + } all_archs = "" keys = archs.keys() diff --git a/bindings/python/capstone/bpf.py b/bindings/python/capstone/bpf.py new file mode 100644 index 00000000..d6263bd3 --- /dev/null +++ b/bindings/python/capstone/bpf.py @@ -0,0 +1,69 @@ +# Capstone Python bindings +# BPF by david942j , 2019 + +import ctypes +from . import copy_ctypes_list +from .bpf_const import * + +class BPFOpMem(ctypes.Structure): + _fields_ = ( + ('base', ctypes.c_uint8), + ('disp', ctypes.c_int32), + ) + +class BPFOpValue(ctypes.Union): + _fields_ = ( + ('reg', ctypes.c_uint8), + ('imm', ctypes.c_uint64), + ('off', ctypes.c_uint32), + ('mem', BPFOpMem), + ('mmem', ctypes.c_uint32), + ('msh', ctypes.c_uint32), + ('ext', ctypes.c_uint32), + ) + +class BPFOp(ctypes.Structure): + _fields_ = ( + ('type', ctypes.c_uint), + ('value', BPFOpValue), + ('access', ctypes.c_uint8), + ) + + @property + def reg(self): + return self.value.reg + + @property + def imm(self): + return self.value.imm + + @property + def off(self): + return self.value.off + + @property + def mem(self): + return self.value.mem + + @property + def mmem(self): + return self.value.mmem + + @property + def msh(self): + return self.value.msh + + @property + def ext(self): + return self.value.ext + + +class CsBPF(ctypes.Structure): + _fields_ = ( + ('op_count', ctypes.c_uint8), + ('operands', BPFOp * 4), + ) + +def get_arch_info(a): + return (copy_ctypes_list(a.operands[:a.op_count])) + diff --git a/bindings/python/capstone/bpf_const.py b/bindings/python/capstone/bpf_const.py new file mode 100644 index 00000000..51dadb42 --- /dev/null +++ b/bindings/python/capstone/bpf_const.py @@ -0,0 +1,113 @@ +# For Capstone Engine. AUTO-GENERATED FILE, DO NOT EDIT [bpf_const.py] + +BPF_OP_INVALID = 0 +BPF_OP_REG = 1 +BPF_OP_IMM = 2 +BPF_OP_OFF = 3 +BPF_OP_MEM = 4 +BPF_OP_MMEM = 5 +BPF_OP_MSH = 6 +BPF_OP_EXT = 7 + +BPF_REG_INVALID = 0 +BPF_REG_A = 1 +BPF_REG_X = 2 +BPF_REG_R0 = 3 +BPF_REG_R1 = 4 +BPF_REG_R2 = 5 +BPF_REG_R3 = 6 +BPF_REG_R4 = 7 +BPF_REG_R5 = 8 +BPF_REG_R6 = 9 +BPF_REG_R7 = 10 +BPF_REG_R8 = 11 +BPF_REG_R9 = 12 +BPF_REG_R10 = 13 +BPF_REG_ENDING = 14 + +BPF_EXT_INVALID = 0 +BPF_EXT_LEN = 1 + +BPF_INS_INVALID = 0 +BPF_INS_ADD = 1 +BPF_INS_SUB = 2 +BPF_INS_MUL = 3 +BPF_INS_DIV = 4 +BPF_INS_OR = 5 +BPF_INS_AND = 6 +BPF_INS_LSH = 7 +BPF_INS_RSH = 8 +BPF_INS_NEG = 9 +BPF_INS_MOD = 10 +BPF_INS_XOR = 11 +BPF_INS_MOV = 12 +BPF_INS_ARSH = 13 +BPF_INS_ADD64 = 14 +BPF_INS_SUB64 = 15 +BPF_INS_MUL64 = 16 +BPF_INS_DIV64 = 17 +BPF_INS_OR64 = 18 +BPF_INS_AND64 = 19 +BPF_INS_LSH64 = 20 +BPF_INS_RSH64 = 21 +BPF_INS_NEG64 = 22 +BPF_INS_MOD64 = 23 +BPF_INS_XOR64 = 24 +BPF_INS_MOV64 = 25 +BPF_INS_ARSH64 = 26 +BPF_INS_LE16 = 27 +BPF_INS_LE32 = 28 +BPF_INS_LE64 = 29 +BPF_INS_BE16 = 30 +BPF_INS_BE32 = 31 +BPF_INS_BE64 = 32 +BPF_INS_LDW = 33 +BPF_INS_LDH = 34 +BPF_INS_LDB = 35 +BPF_INS_LDDW = 36 +BPF_INS_LDXW = 37 +BPF_INS_LDXH = 38 +BPF_INS_LDXB = 39 +BPF_INS_LDXDW = 40 +BPF_INS_STW = 41 +BPF_INS_STH = 42 +BPF_INS_STB = 43 +BPF_INS_STDW = 44 +BPF_INS_STXW = 45 +BPF_INS_STXH = 46 +BPF_INS_STXB = 47 +BPF_INS_STXDW = 48 +BPF_INS_XADDW = 49 +BPF_INS_XADDDW = 50 +BPF_INS_JMP = 51 +BPF_INS_JEQ = 52 +BPF_INS_JGT = 53 +BPF_INS_JGE = 54 +BPF_INS_JSET = 55 +BPF_INS_JNE = 56 +BPF_INS_JSGT = 57 +BPF_INS_JSGE = 58 +BPF_INS_CALL = 59 +BPF_INS_EXIT = 60 +BPF_INS_JLT = 61 +BPF_INS_JLE = 62 +BPF_INS_JSLT = 63 +BPF_INS_JSLE = 64 +BPF_INS_RET = 65 +BPF_INS_TAX = 66 +BPF_INS_TXA = 67 +BPF_INS_ENDING = 68 +BPF_INS_LD = BPF_INS_LDW +BPF_INS_LDX = BPF_INS_LDXW +BPF_INS_ST = BPF_INS_STW +BPF_INS_STX = BPF_INS_STXW + +BPF_GRP_INVALID = 0 +BPF_GRP_LOAD = 1 +BPF_GRP_STORE = 2 +BPF_GRP_ALU = 3 +BPF_GRP_JUMP = 4 +BPF_GRP_CALL = 5 +BPF_GRP_RETURN = 6 +BPF_GRP_MISC = 7 +BPF_GRP_ENDING = 8 diff --git a/bindings/python/test_bpf.py b/bindings/python/test_bpf.py new file mode 100755 index 00000000..4df7f359 --- /dev/null +++ b/bindings/python/test_bpf.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Capstone Python bindings +# BPF tests by david942j , 2019 + +from __future__ import print_function +from capstone import * +from capstone.bpf import * +from xprint import to_hex, to_x_32 + + +CBPF_CODE = b"\x94\x09\x00\x00\x37\x13\x03\x00\x87\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00" +EBPF_CODE = b"\x97\x09\x00\x00\x37\x13\x03\x00\xdc\x02\x00\x00\x20\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\xdb\x3a\x00\x01\x00\x00\x00\x00\x84\x02\x00\x00\x00\x00\x00\x00\x6d\x33\x17\x02\x00\x00\x00\x00" + +all_tests = ( + (CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC, CBPF_CODE, "cBPF Le", None), + (CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, EBPF_CODE, "eBPF Le", None), + ) + +ext_name = {} +ext_name[BPF_EXT_LEN] = '#len' + +def print_insn_detail(insn): + # print address, mnemonic and operands + print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str)) + + # "data" instruction generated by SKIPDATA option has no detail + if insn.id == 0: + return + + if len(insn.groups) > 0: + print('\tGroups: ' + ' '.join(map(lambda g: insn.group_name(g), insn.groups))) + + print("\tOperand count: %u" % len(insn.operands)) + for c, op in enumerate(insn.operands): + print("\t\toperands[%u].type: " % c, end='') + if op.type == BPF_OP_REG: + print("REG = " + insn.reg_name(op.reg)) + elif op.type == BPF_OP_IMM: + print("IMM = " + hex(op.imm)[:-1]) + elif op.type == BPF_OP_OFF: + print("OFF = +0x" + to_x_32(op.off)) + elif op.type == BPF_OP_MEM: + print("MEM") + if op.mem.base != 0: + print("\t\t\toperands[%u].mem.base: REG = %s" \ + % (c, insn.reg_name(op.mem.base))) + print("\t\t\toperands[%u].mem.disp: 0x%s" \ + % (c, to_x_32(op.mem.disp))) + elif op.type == BPF_OP_MMEM: + print("MMEM = 0x" + to_x_32(op.mmem)) + elif op.type == BPF_OP_MSH: + print("MSH = 4*([0x%s]&0xf)" % to_x_32(op.msh)) + elif op.type == BPF_OP_EXT: + print("EXT = " + ext_name[op.ext]) + + (regs_read, regs_write) = insn.regs_access() + + if len(regs_read) > 0: + print("\tRegisters read:", end="") + for r in regs_read: + print(" %s" % insn.reg_name(r), end="") + print("") + + if len(regs_write) > 0: + print("\tRegisters modified:", end="") + for r in regs_write: + print(" %s" % insn.reg_name(r), end="") + print("") + +def test_class(): + + for (arch, mode, code, comment, syntax) in all_tests: + print("*" * 16) + print("Platform: %s" % comment) + print("Code: %s" % to_hex(code)) + print("Disasm:") + + try: + md = Cs(arch, mode) + if syntax is not None: + md.syntax = syntax + md.detail = True + for insn in md.disasm(code, 0x0): + print_insn_detail(insn) + print () + except CsError as e: + print("ERROR: %s" % e) + + +if __name__ == '__main__': + test_class() diff --git a/config.mk b/config.mk index 6a935604..052fc781 100644 --- a/config.mk +++ b/config.mk @@ -4,7 +4,7 @@ ################################################################################ # Specify which archs you want to compile in. By default, we build all archs. -CAPSTONE_ARCHS ?= arm aarch64 m68k mips powerpc sparc systemz x86 xcore tms320c64x m680x evm mos65xx wasm +CAPSTONE_ARCHS ?= arm aarch64 m68k mips powerpc sparc systemz x86 xcore tms320c64x m680x evm mos65xx wasm bpf ################################################################################ diff --git a/cs.c b/cs.c index abae2a6c..e9e681d8 100644 --- a/cs.c +++ b/cs.c @@ -66,6 +66,7 @@ #include "arch/X86/X86Module.h" #include "arch/XCore/XCoreModule.h" #include "arch/MOS65XX/MOS65XXModule.h" +#include "arch/BPF/BPFModule.h" // constructor initialization for all archs static cs_err (*cs_arch_init[MAX_ARCH])(cs_struct *) = { @@ -139,6 +140,11 @@ static cs_err (*cs_arch_init[MAX_ARCH])(cs_struct *) = { #else NULL, #endif +#ifdef CAPSTONE_HAS_BPF + BPF_global_init, +#else + NULL, +#endif }; // support cs_option() for all archs @@ -213,7 +219,11 @@ static cs_err (*cs_arch_option[MAX_ARCH]) (cs_struct *, cs_opt_type, size_t valu #else NULL, #endif - +#ifdef CAPSTONE_HAS_BPF + BPF_option, +#else + NULL, +#endif }; // bitmask for finding disallowed modes for an arch: @@ -296,6 +306,12 @@ static cs_mode cs_arch_disallowed_mode_mask[MAX_ARCH] = { #else 0, #endif +#ifdef CAPSTONE_HAS_BPF + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC | CS_MODE_BPF_EXTENDED + | CS_MODE_BIG_ENDIAN), +#else + 0, +#endif }; // bitmask of enabled architectures @@ -342,6 +358,9 @@ static uint32_t all_arch = 0 #ifdef CAPSTONE_HAS_MOS65XX | (1 << CS_ARCH_MOS65XX) #endif +#ifdef CAPSTONE_HAS_BPF + | (1 << CS_ARCH_BPF) +#endif ; @@ -413,7 +432,8 @@ bool CAPSTONE_API cs_support(int query) (1 << CS_ARCH_SYSZ) | (1 << CS_ARCH_XCORE) | (1 << CS_ARCH_M68K) | (1 << CS_ARCH_TMS320C64X) | (1 << CS_ARCH_M680X) | (1 << CS_ARCH_EVM) | - (1 << CS_ARCH_MOS65XX) | (1 << CS_ARCH_WASM)); + (1 << CS_ARCH_MOS65XX) | (1 << CS_ARCH_WASM) | + (1 << CS_ARCH_BPF)); if ((unsigned int)query < CS_ARCH_MAX) return all_arch & (1 << query); @@ -685,6 +705,9 @@ static uint8_t skipdata_size(cs_struct *handle) case CS_ARCH_MOS65XX: // MOS65XX alignment is 1. return 1; + case CS_ARCH_BPF: + // both classic and extended BPF have alignment 8. + return 8; } } @@ -1409,6 +1432,11 @@ int CAPSTONE_API cs_op_count(csh ud, const cs_insn *insn, unsigned int op_type) if (insn->detail->wasm.operands[i].type == (wasm_op_type)op_type) count++; break; + case CS_ARCH_BPF: + for (i = 0; i < insn->detail->bpf.op_count; i++) + if (insn->detail->bpf.operands[i].type == (bpf_op_type)op_type) + count++; + break; } return count; @@ -1560,7 +1588,14 @@ int CAPSTONE_API cs_op_index(csh ud, const cs_insn *insn, unsigned int op_type, return i; } break; - + case CS_ARCH_BPF: + for (i = 0; i < insn->detail->bpf.op_count; i++) { + if (insn->detail->bpf.operands[i].type == (bpf_op_type)op_type) + count++; + if (count == post) + return i; + } + break; } return -1; diff --git a/cstool/cstool.c b/cstool/cstool.c index 43bb761c..07b7192f 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -60,6 +60,10 @@ static struct { { "evm", CS_ARCH_EVM, 0 }, { "wasm", CS_ARCH_WASM, 0 }, { "mos65xx", CS_ARCH_MOS65XX, 0 }, + { "bpf", CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC }, + { "bpfbe", CS_ARCH_BPF, CS_MODE_BIG_ENDIAN | CS_MODE_BPF_CLASSIC }, + { "ebpf", CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED }, + { "ebpfbe", CS_ARCH_BPF, CS_MODE_BIG_ENDIAN | CS_MODE_BPF_EXTENDED }, { NULL } }; @@ -77,6 +81,7 @@ void print_insn_detail_m680x(csh handle, cs_insn *ins); void print_insn_detail_evm(csh handle, cs_insn *ins); void print_insn_detail_wasm(csh handle, cs_insn *ins); void print_insn_detail_mos65xx(csh handle, cs_insn *ins); +void print_insn_detail_bpf(csh handle, cs_insn *ins); static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins); @@ -222,6 +227,13 @@ static void usage(char *prog) printf(" wasm: Web Assembly\n"); } + if (cs_support(CS_ARCH_BPF)) { + printf(" bpf Classic BPF\n"); + printf(" bpfbe Classic BPF + big endian\n"); + printf(" ebpf Extended BPF\n"); + printf(" ebpfbe Extended BPF + big endian\n"); + } + printf("\nExtra options:\n"); printf(" -d show detailed information of the instructions\n"); printf(" -s decode in SKIPDATA mode\n"); @@ -274,6 +286,9 @@ static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins) case CS_ARCH_MOS65XX: print_insn_detail_mos65xx(handle, ins); break; + case CS_ARCH_BPF: + print_insn_detail_bpf(handle, ins); + break; default: break; } diff --git a/cstool/cstool_bpf.c b/cstool/cstool_bpf.c new file mode 100644 index 00000000..879f45fe --- /dev/null +++ b/cstool/cstool_bpf.c @@ -0,0 +1,80 @@ +#include + +#include +#include + +static const char * ext_name[] = { + [BPF_EXT_LEN] = "#len", +}; + +void print_insn_detail_bpf(csh handle, cs_insn *ins); + +void print_insn_detail_bpf(csh handle, cs_insn *ins) +{ + unsigned i; + cs_bpf *bpf; + cs_regs regs_read, regs_write; + uint8_t regs_read_count, regs_write_count; + + // detail can be NULL on "data" instruction if SKIPDATA option is turned ON + if (ins->detail == NULL) + return; + + bpf = &(ins->detail->bpf); + + printf("\tOperand count: %u\n", bpf->op_count); + + for (i = 0; i < bpf->op_count; i++) { + cs_bpf_op *op = &(bpf->operands[i]); + printf("\t\toperands[%u].type: ", i); + switch (op->type) { + case BPF_OP_INVALID: + printf("INVALID\n"); + break; + case BPF_OP_REG: + printf("REG = %s\n", cs_reg_name(handle, op->reg)); + break; + case BPF_OP_IMM: + printf("IMM = 0x%" PRIx64 "\n", op->imm); + break; + case BPF_OP_OFF: + printf("OFF = +0x%x\n", op->off); + break; + case BPF_OP_MEM: + printf("MEM\n"); + if (op->mem.base != BPF_REG_INVALID) + printf("\t\t\toperands[%u].mem.base: REG = %s\n", + i, cs_reg_name(handle, op->mem.base)); + printf("\t\t\toperands[%u].mem.disp: 0x%x\n", i, op->mem.disp); + break; + case BPF_OP_MMEM: + printf("MMEM = M[0x%x]\n", op->mmem); + break; + case BPF_OP_MSH: + printf("MSH = 4*([0x%x]&0xf)\n", op->msh); + break; + case BPF_OP_EXT: + printf("EXT = %s\n", ext_name[op->ext]); + break; + } + } + + /* print all registers that are involved in this instruction */ + if (!cs_regs_access(handle, ins, + regs_read, ®s_read_count, + regs_write, ®s_write_count)) { + if (regs_read_count) { + printf("\tRegisters read:"); + for(i = 0; i < regs_read_count; i++) + printf(" %s", cs_reg_name(handle, regs_read[i])); + printf("\n"); + } + + if (regs_write_count) { + printf("\tRegisters modified:"); + for(i = 0; i < regs_write_count; i++) + printf(" %s", cs_reg_name(handle, regs_write[i])); + printf("\n"); + } + } +} diff --git a/include/capstone/bpf.h b/include/capstone/bpf.h new file mode 100644 index 00000000..c2ccff6d --- /dev/null +++ b/include/capstone/bpf.h @@ -0,0 +1,208 @@ +/* Capstone Disassembly Engine */ +/* BPF Backend by david942j , 2019 */ + +#ifndef CAPSTONE_BPF_H +#define CAPSTONE_BPF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "platform.h" + +#ifdef _MSC_VER +#pragma warning(disable:4201) +#endif + +/// Operand type for instruction's operands +typedef enum bpf_op_type { + BPF_OP_INVALID = 0, + + BPF_OP_REG, + BPF_OP_IMM, + BPF_OP_OFF, + BPF_OP_MEM, + BPF_OP_MMEM, ///< M[k] in cBPF + BPF_OP_MSH, ///< corresponds to cBPF's BPF_MSH mode + BPF_OP_EXT, ///< cBPF's extension (not eBPF) +} bpf_op_type; + +/// BPF registers +typedef enum bpf_reg { + BPF_REG_INVALID = 0, + + ///< cBPF + BPF_REG_A, + BPF_REG_X, + + ///< eBPF + BPF_REG_R0, + BPF_REG_R1, + BPF_REG_R2, + BPF_REG_R3, + BPF_REG_R4, + BPF_REG_R5, + BPF_REG_R6, + BPF_REG_R7, + BPF_REG_R8, + BPF_REG_R9, + BPF_REG_R10, + + BPF_REG_ENDING, +} bpf_reg; + +/// Instruction's operand referring to memory +/// This is associated with BPF_OP_MEM operand type above +typedef struct bpf_op_mem { + bpf_reg base; ///< base register + uint32_t disp; ///< offset value +} bpf_op_mem; + +typedef enum bpf_ext_type { + BPF_EXT_INVALID = 0, + + BPF_EXT_LEN, +} bpf_ext_type; + +/// Instruction operand +typedef struct cs_bpf_op { + bpf_op_type type; + union { + uint8_t reg; ///< register value for REG operand + uint64_t imm; ///< immediate value IMM operand + uint32_t off; ///< offset value, used in jump & call + bpf_op_mem mem; ///< base/disp value for MEM operand + /* cBPF only */ + uint32_t mmem; ///< M[k] in cBPF + uint32_t msh; ///< corresponds to cBPF's BPF_MSH mode + uint32_t ext; ///< cBPF's extension (not eBPF) + }; + + /// How is this operand accessed? (READ, WRITE or READ|WRITE) + /// This field is combined of cs_ac_type. + /// NOTE: this field is irrelevant if engine is compiled in DIET mode. + uint8_t access; +} cs_bpf_op; + +/// Instruction structure +typedef struct cs_bpf { + uint8_t op_count; + cs_bpf_op operands[4]; +} cs_bpf; + +/// BPF instruction +typedef enum bpf_insn { + BPF_INS_INVALID = 0, + + ///< ALU + BPF_INS_ADD, + BPF_INS_SUB, + BPF_INS_MUL, + BPF_INS_DIV, + BPF_INS_OR, + BPF_INS_AND, + BPF_INS_LSH, + BPF_INS_RSH, + BPF_INS_NEG, + BPF_INS_MOD, + BPF_INS_XOR, + BPF_INS_MOV, ///< eBPF only + BPF_INS_ARSH, ///< eBPF only + + ///< ALU64, eBPF only + BPF_INS_ADD64, + BPF_INS_SUB64, + BPF_INS_MUL64, + BPF_INS_DIV64, + BPF_INS_OR64, + BPF_INS_AND64, + BPF_INS_LSH64, + BPF_INS_RSH64, + BPF_INS_NEG64, + BPF_INS_MOD64, + BPF_INS_XOR64, + BPF_INS_MOV64, + BPF_INS_ARSH64, + + ///< Byteswap, eBPF only + BPF_INS_LE16, + BPF_INS_LE32, + BPF_INS_LE64, + BPF_INS_BE16, + BPF_INS_BE32, + BPF_INS_BE64, + + ///< Load + BPF_INS_LDW, ///< eBPF only + BPF_INS_LDH, + BPF_INS_LDB, + BPF_INS_LDDW, ///< eBPF only: load 64-bit imm + BPF_INS_LDXW, ///< eBPF only + BPF_INS_LDXH, ///< eBPF only + BPF_INS_LDXB, ///< eBPF only + BPF_INS_LDXDW, ///< eBPF only + + ///< Store + BPF_INS_STW, ///< eBPF only + BPF_INS_STH, ///< eBPF only + BPF_INS_STB, ///< eBPF only + BPF_INS_STDW, ///< eBPF only + BPF_INS_STXW, ///< eBPF only + BPF_INS_STXH, ///< eBPF only + BPF_INS_STXB, ///< eBPF only + BPF_INS_STXDW, ///< eBPF only + BPF_INS_XADDW, ///< eBPF only + BPF_INS_XADDDW, ///< eBPF only + + ///< Jump + BPF_INS_JMP, + BPF_INS_JEQ, + BPF_INS_JGT, + BPF_INS_JGE, + BPF_INS_JSET, + BPF_INS_JNE, ///< eBPF only + BPF_INS_JSGT, ///< eBPF only + BPF_INS_JSGE, ///< eBPF only + BPF_INS_CALL, ///< eBPF only + BPF_INS_EXIT, ///< eBPF only + BPF_INS_JLT, ///< eBPF only + BPF_INS_JLE, ///< eBPF only + BPF_INS_JSLT, ///< eBPF only + BPF_INS_JSLE, ///< eBPF only + + ///< Return, cBPF only + BPF_INS_RET, + + ///< Misc, cBPF only + BPF_INS_TAX, + BPF_INS_TXA, + + BPF_INS_ENDING, + + // alias instructions + BPF_INS_LD = BPF_INS_LDW, ///< cBPF only + BPF_INS_LDX = BPF_INS_LDXW, ///< cBPF only + BPF_INS_ST = BPF_INS_STW, ///< cBPF only + BPF_INS_STX = BPF_INS_STXW, ///< cBPF only +} bpf_insn; + +/// Group of BPF instructions +typedef enum bpf_insn_group { + BPF_GRP_INVALID = 0, ///< = CS_GRP_INVALID + + BPF_GRP_LOAD, + BPF_GRP_STORE, + BPF_GRP_ALU, + BPF_GRP_JUMP, + BPF_GRP_CALL, ///< eBPF only + BPF_GRP_RETURN, + BPF_GRP_MISC, ///< cBPF only + + BPF_GRP_ENDING, +} bpf_insn_group; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index e030d7af..3defb0e4 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -86,6 +86,7 @@ typedef enum cs_arch { CS_ARCH_EVM, ///< Ethereum architecture CS_ARCH_MOS65XX, ///< MOS65XX architecture (including MOS6502) CS_ARCH_WASM, ///< WebAssembly architecture + CS_ARCH_BPF, ///< Berkeley Packet Filter architecture (including eBPF) CS_ARCH_MAX, CS_ARCH_ALL = 0xFFFF, // All architectures - for cs_support() } cs_arch; @@ -136,6 +137,8 @@ typedef enum cs_mode { CS_MODE_M680X_CPU12 = 1 << 9, ///< M680X Motorola/Freescale/NXP CPU12 ///< used on M68HC12/HCS12 CS_MODE_M680X_HCS08 = 1 << 10, ///< M680X Freescale/NXP HCS08 mode + CS_MODE_BPF_CLASSIC = 0, ///< Classic BPF mode (default) + CS_MODE_BPF_EXTENDED = 1 << 0, ///< Extended BPF mode } cs_mode; typedef void* (CAPSTONE_API *cs_malloc_t)(size_t size); @@ -261,6 +264,7 @@ typedef struct cs_opt_skipdata { /// EVM: 1 bytes. /// WASM: 1 bytes. /// MOS65XX: 1 bytes. + /// BPF: 8 bytes. cs_skipdata_cb_t callback; // default value is NULL /// User-defined data to be passed to @callback function pointer. @@ -282,6 +286,7 @@ typedef struct cs_opt_skipdata { #include "evm.h" #include "wasm.h" #include "mos65xx.h" +#include "bpf.h" /// NOTE: All information in cs_detail is only available when CS_OPT_DETAIL = CS_OPT_ON /// Initialized as memset(., 0, offsetof(cs_detail, ARCH)+sizeof(cs_ARCH)) @@ -314,6 +319,7 @@ typedef struct cs_detail { cs_evm evm; ///< Ethereum architecture cs_mos65xx mos65xx; ///< MOS65XX architecture (including MOS6502) cs_wasm wasm; ///< Web Assembly architecture + cs_bpf bpf; ///< Berkeley Packet Filter architecture (including eBPF) }; } cs_detail; diff --git a/suite/MC/BPF/classic-all.cs b/suite/MC/BPF/classic-all.cs new file mode 100644 index 00000000..92d4c096 --- /dev/null +++ b/suite/MC/BPF/classic-all.cs @@ -0,0 +1,51 @@ +# CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN+CS_MODE_BPF_CLASSIC, None +0x00,0x00,0x98,0xab,0x08,0x02,0x0e,0x45 = ld 0x450e0208 +0x01,0x00,0x44,0x49,0x1f,0xfe,0xd3,0x93 = ldx 0x93d3fe1f +0x04,0x00,0xda,0x23,0x71,0xc5,0x51,0x42 = add 0x4251c571 +0x05,0x00,0xd4,0xbd,0x37,0xc8,0x2c,0xd5 = jmp +0xd52cc837 +0x06,0x00,0xa7,0x84,0x25,0x40,0x28,0x1c = ret 0x1c284025 +0x07,0x00,0xe8,0xe8,0x48,0xe2,0x84,0x2a = tax +0x0c,0x00,0x55,0x8c,0x32,0xd8,0x21,0xe8 = add x +0x0e,0x00,0xd4,0x24,0x96,0xf7,0xa1,0x49 = ret x +0x14,0x00,0x6a,0xc8,0x14,0x50,0x2d,0x69 = sub 0x692d5014 +0x15,0x00,0xc3,0x39,0x6e,0x4f,0x37,0x18 = jeq 0x18374f6e, +0xc3, +0x39 +0x16,0x00,0x57,0xd2,0xc4,0xd4,0x8a,0x51 = ret a +0x1c,0x00,0xd1,0x51,0x90,0x8a,0x8d,0xea = sub x +0x1d,0x00,0x2e,0xa8,0xbc,0xa7,0xd5,0x3a = jeq x, +0x2e, +0xa8 +0x20,0x00,0x9a,0x43,0x93,0x27,0xec,0xf7 = ld [0xf7ec2793] +0x24,0x00,0x0f,0x46,0xbe,0xe5,0xd2,0x4a = mul 0x4ad2e5be +0x25,0x00,0x8c,0x80,0xc1,0x03,0x38,0x61 = jgt 0x613803c1, +0x8c, +0x80 +0x28,0x00,0xc3,0x05,0x73,0x01,0x39,0xbd = ldh [0xbd390173] +0x2c,0x00,0x7a,0x3d,0xad,0x19,0xe7,0xcc = mul x +0x2d,0x00,0xd9,0xc6,0xf7,0x72,0x9a,0x9d = jgt x, +0xd9, +0xc6 +0x30,0x00,0x22,0x29,0x29,0x5b,0xb5,0x87 = ldb [0x87b55b29] +0x34,0x00,0xa8,0xfa,0x6a,0x92,0xa2,0xa8 = div 0xa8a2926a +0x35,0x00,0x24,0xdb,0x58,0x41,0xa8,0x58 = jge 0x58a84158, +0x24, +0xdb +0x3c,0x00,0x41,0xa6,0xd5,0x66,0x8a,0xdd = div x +0x3d,0x00,0xe4,0xbc,0x40,0xb3,0x4d,0x84 = jge x, +0xe4, +0xbc +0x40,0x00,0xf1,0xa0,0xd9,0x89,0x72,0x25 = ld [x+0x257289d9] +0x44,0x00,0x8d,0xf8,0x49,0xdb,0x10,0x82 = or 0x8210db49 +0x45,0x00,0x43,0xfc,0x7d,0xa1,0x34,0xed = jset 0xed34a17d, +0x43, +0xfc +0x48,0x00,0x6b,0x89,0x0b,0xca,0xfb,0x1b = ldh [x+0x1bfbca0b] +0x4c,0x00,0xc9,0xff,0x36,0xe9,0x2a,0xe7 = or x +0x4d,0x00,0x0d,0xaa,0xc3,0x50,0xea,0x40 = jset x, +0xd, +0xaa +0x50,0x00,0xd9,0xf3,0xda,0xa7,0xd9,0xb1 = ldb [x+0xb1d9a7da] +0x54,0x00,0x14,0x82,0x29,0x82,0x6c,0x06 = and 0x66c8229 +0x5c,0x00,0x80,0x37,0x5f,0x52,0xc0,0x84 = and x +0x60,0x00,0xba,0x4e,0xb5,0x3f,0xdc,0xd8 = ld m[0xd8dc3fb5] +0x61,0x00,0x06,0xd9,0xcd,0x84,0x58,0x94 = ldx m[0x945884cd] +0x62,0x00,0x2c,0x44,0xdf,0x71,0x48,0x1b = st m[0x1b4871df] +0x63,0x00,0xc9,0x53,0x7f,0x80,0x89,0x2d = stx m[0x2d89807f] +0x64,0x00,0x8a,0xe5,0xf0,0x0c,0xca,0xfd = lsh 0xfdca0cf0 +0x6c,0x00,0xd3,0x85,0xc1,0x96,0xb1,0x48 = lsh x +0x74,0x00,0xfa,0x6f,0xe9,0xbe,0xde,0x7e = rsh 0x7edebee9 +0x7c,0x00,0x0d,0x89,0xed,0x17,0x7d,0xcd = rsh x +0x80,0x00,0x70,0x62,0x0e,0x61,0x1b,0x94 = ld #len +0x81,0x00,0xa0,0x03,0xa2,0x5c,0x1f,0x2a = ldx #len +0x84,0x00,0x4f,0x0f,0xc9,0x4a,0x72,0xff = neg +0x87,0x00,0x17,0x2a,0x9a,0xd6,0xb6,0x8f = txa +0x94,0x00,0x85,0x0c,0x29,0xb2,0xbe,0x83 = mod 0x83beb229 +0x9c,0x00,0x30,0x3f,0x9d,0x33,0x89,0x50 = mod x +0xa1,0x00,0x53,0x03,0xdd,0xdf,0xd4,0xe3 = ldx 4*([0xe3d4dfdd]&0xf) +0xa4,0x00,0x66,0x8f,0x3c,0xde,0xe2,0x4d = xor 0x4de2de3c +0xac,0x00,0x02,0x2f,0x1e,0xe3,0x2e,0x84 = xor x diff --git a/suite/MC/BPF/classic-be.cs b/suite/MC/BPF/classic-be.cs new file mode 100644 index 00000000..b7578ca8 --- /dev/null +++ b/suite/MC/BPF/classic-be.cs @@ -0,0 +1,8 @@ +# CS_ARCH_BPF, CS_MODE_BIG_ENDIAN+CS_MODE_BPF_CLASSIC, None +0x00,0x01,0x00,0x00,0x33,0x00,0x0c,0x11 = ldx 0x33000c11 +0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00 = ld #len +0x00,0xa1,0x00,0x00,0x10,0x00,0x00,0x00 = ldx 4*([0x10000000]&0xf) +0x00,0x60,0x00,0x00,0x09,0x00,0x00,0x00 = ld m[0x9000000] +0x00,0x30,0x00,0x00,0x37,0x13,0x03,0x00 = ldb [0x37130300] +0x00,0x63,0x00,0x00,0x0f,0x00,0x30,0x00 = stx m[0xf003000] +0x00,0x84,0x00,0x00,0x00,0x00,0x00,0x00 = neg diff --git a/suite/MC/BPF/extended-all.cs b/suite/MC/BPF/extended-all.cs new file mode 100644 index 00000000..6558521d --- /dev/null +++ b/suite/MC/BPF/extended-all.cs @@ -0,0 +1,97 @@ +# CS_ARCH_BPF, CS_MODE_LITTLE_ENDIAN+CS_MODE_BPF_EXTENDED, None +0x04,0xb4,0x97,0xa8,0xe8,0x60,0x56,0xe1 = add r4, 0xe15660e8 +0x05,0xc7,0x71,0xb0,0x43,0x1f,0xb9,0xf5 = jmp +0xb071 +0x07,0x76,0x01,0x28,0xc4,0x09,0xfe,0x8b = add64 r6, 0x8bfe09c4 +0x0c,0x42,0x0a,0x48,0x58,0xc4,0xef,0x37 = add r2, r4 +0x0f,0x09,0x40,0x54,0x67,0x24,0x2f,0x88 = add64 r9, r0 +0x14,0xd9,0xba,0xb8,0x6f,0x07,0x93,0x2a = sub r9, 0x2a93076f +0x15,0x6a,0x9f,0x38,0x1a,0x9d,0xb7,0x4d = jeq r10, 0x4db79d1a, +0x389f +0x17,0xc5,0x60,0xed,0x0b,0xdc,0xe6,0x22 = sub64 r5, 0x22e6dc0b +0x18,0xa3,0x5c,0x14,0xde,0xf0,0xa5,0xff,0x9a,0x7e,0x10,0xee,0xd8,0xa4,0x2b,0x2f = lddw 0x2f2ba4d8ffa5f0de +0x1c,0x73,0x68,0xa4,0x8b,0x5b,0x93,0x1f = sub r3, r7 +0x1d,0x21,0x20,0x4d,0xe3,0x47,0xaf,0x1b = jeq r1, r2, +0x4d20 +0x1f,0x06,0x51,0x5a,0x39,0xb2,0x10,0x10 = sub64 r6, r0 +0x20,0xc7,0x0c,0x70,0xda,0x41,0x1a,0xca = ldw [0xca1a41da] +0x24,0xb6,0x69,0x66,0xe3,0xef,0xec,0x25 = mul r6, 0x25ecefe3 +0x25,0x89,0xda,0x53,0x19,0x73,0x8a,0xc0 = jgt r9, 0xc08a7319, +0x53da +0x27,0xb1,0x96,0x1d,0xd4,0xab,0x2c,0x8c = mul64 r1, 0x8c2cabd4 +0x28,0x4e,0xb0,0x62,0xe8,0x48,0x0b,0x0d = ldh [0xd0b48e8] +0x2c,0x78,0x03,0xf6,0x29,0x29,0x15,0xfc = mul r8, r7 +0x2d,0x18,0x5b,0xfd,0x8f,0x53,0x3b,0xf0 = jgt r8, r1, +0xfd5b +0x2f,0x77,0xc7,0xa4,0x4c,0x32,0x73,0x2a = mul64 r7, r7 +0x30,0x5f,0xfe,0xfc,0x85,0x66,0x7c,0x4b = ldb [0x4b7c6685] +0x34,0x46,0x49,0x33,0xe1,0x72,0xd4,0xcb = div r6, 0xcbd472e1 +0x35,0xa5,0x42,0xb9,0x5b,0x37,0xa1,0x3d = jge r5, 0x3da1375b, +0xb942 +0x37,0x84,0xd8,0xba,0x3b,0x84,0x55,0x1f = div64 r4, 0x1f55843b +0x38,0x8e,0x3f,0xd7,0x1c,0x3e,0x3a,0x7b = lddw [0x7b3a3e1c] +0x3d,0x1a,0xc3,0x9b,0x88,0xa2,0x3f,0x65 = jge r10, r1, +0x9bc3 +0x3f,0x36,0x99,0x32,0x7e,0x07,0x59,0x7a = div64 r6, r3 +0x40,0x95,0xc2,0x39,0x6b,0xe7,0xd7,0xc4 = ldw [r9+0xc4d7e76b] +0x44,0x16,0xf7,0x98,0xf7,0x02,0x92,0x94 = or r6, 0x949202f7 +0x45,0x12,0xa2,0xf2,0x14,0xe7,0x2d,0x1e = jset r2, 0x1e2de714, +0xf2a2 +0x47,0x36,0xf4,0xd5,0xbe,0x04,0x58,0x4d = or64 r6, 0x4d5804be +0x48,0x7e,0xfb,0x77,0xeb,0x0e,0x5a,0x0d = ldh [r7+0xd5a0eeb] +0x4c,0x81,0x0a,0x66,0xfc,0x32,0x61,0xc4 = or r1, r8 +0x4d,0x10,0x67,0x44,0x4d,0x3f,0x4d,0x8b = jset r0, r1, +0x4467 +0x4f,0x81,0xeb,0x6b,0xde,0x98,0x87,0x64 = or64 r1, r8 +0x50,0x38,0x80,0xf8,0x04,0x70,0xd1,0x6c = ldb [r3+0x6cd17004] +0x54,0x40,0x0a,0x6a,0x4a,0xe8,0xab,0xfb = and r0, 0xfbabe84a +0x55,0xb9,0xa3,0x80,0x90,0xbc,0xc8,0x96 = jne r9, 0x96c8bc90, +0x80a3 +0x57,0x30,0x12,0xe9,0x7c,0x06,0x82,0x27 = and64 r0, 0x2782067c +0x58,0x6d,0xf1,0x05,0xd3,0x50,0x4b,0xc0 = lddw [r6+0xc04b50d3] +0x5c,0x02,0x95,0xb2,0xbd,0x3f,0x38,0x37 = and r2, r0 +0x5d,0x56,0xa3,0x4c,0x2a,0xc8,0x4a,0xc5 = jne r6, r5, +0x4ca3 +0x5f,0x59,0xf6,0xaa,0x5d,0xeb,0x27,0xdd = and64 r9, r5 +0x61,0x28,0xb2,0xed,0xb8,0xcf,0xb5,0xe4 = ldxw r8, [r2+0xedb2] +0x62,0xa5,0xdf,0xe0,0x14,0x7d,0x95,0x78 = stw [r5+0xe0df], 0x78957d14 +0x63,0x77,0x2f,0xcf,0x76,0xb7,0xd3,0xfa = stxw [r7+0xcf2f], r7 +0x64,0x68,0xc1,0xf4,0x88,0x92,0xd2,0xeb = lsh r8, 0xebd29288 +0x65,0xe8,0x97,0xe1,0x87,0xbe,0x8f,0xf8 = jsgt r8, 0xf88fbe87, +0xe197 +0x67,0x00,0xd7,0xc0,0x05,0xb0,0xf6,0x74 = lsh64 r0, 0x74f6b005 +0x69,0x14,0xc7,0x8e,0x0b,0xc1,0xad,0x69 = ldxh r4, [r1+0x8ec7] +0x6a,0xb5,0xbc,0x8c,0x4f,0x5c,0x94,0x01 = sth [r5+0x8cbc], 0x1945c4f +0x6b,0x34,0x58,0xf5,0xc8,0x27,0x9e,0x14 = stxh [r4+0xf558], r3 +0x6c,0x21,0x10,0x48,0x01,0x3e,0x6e,0xf8 = lsh r1, r2 +0x6d,0x38,0x69,0xe3,0xc9,0xac,0x3c,0xdb = jsgt r8, r3, +0xe369 +0x6f,0x64,0x49,0xd6,0x07,0xa9,0x93,0x13 = lsh64 r4, r6 +0x71,0xa0,0xeb,0xfb,0x3d,0x6b,0x58,0x45 = ldxb r0, [r10+0xfbeb] +0x72,0xe2,0xc1,0x1b,0x25,0x2f,0x4a,0xdc = stb [r2+0x1bc1], 0xdc4a2f25 +0x73,0x44,0x09,0x0f,0xc1,0x07,0xa8,0xf4 = stxb [r4+0xf09], r4 +0x74,0xe0,0x23,0x23,0x2f,0x04,0x15,0x35 = rsh r0, 0x3515042f +0x75,0x04,0x8e,0x18,0x6a,0xcc,0x3c,0x09 = jsge r4, 0x93ccc6a, +0x188e +0x77,0x09,0x3a,0xa7,0x3c,0x6e,0xfa,0x23 = rsh64 r9, 0x23fa6e3c +0x79,0xa9,0x5c,0x7b,0x16,0x1f,0xfb,0x01 = ldxdw r9, [r10+0x7b5c] +0x7a,0xd8,0x6b,0x04,0x76,0xf0,0x51,0x75 = stdw [r8+0x46b], 0x7551f076 +0x7b,0x72,0x0f,0x30,0x51,0x78,0xd2,0x9a = stxdw [r2+0x300f], r7 +0x7c,0x13,0x12,0x73,0x5a,0x20,0x65,0xdb = rsh r3, r1 +0x7d,0x58,0x52,0x01,0x90,0xf9,0x30,0x9a = jsge r8, r5, +0x152 +0x7f,0x98,0xea,0xff,0xcf,0x5d,0x5f,0xa3 = rsh64 r8, r9 +0x84,0x14,0xd4,0xaf,0x60,0xe1,0x41,0x18 = neg r4 +0x85,0xd3,0xa5,0xe2,0x83,0x3d,0xbd,0x5d = call 0x5dbd3d83 +0x87,0xf5,0x2b,0xbe,0xa9,0xc7,0x31,0xa3 = neg64 r5 +0x94,0x39,0x0d,0xdc,0x0b,0xd2,0xd1,0xc9 = mod r9, 0xc9d1d20b +0x95,0xf2,0xd1,0x83,0x53,0xa9,0x09,0x9f = exit +0x97,0xc8,0xa6,0x75,0xd2,0x09,0x98,0x09 = mod64 r8, 0x99809d2 +0x9c,0x96,0xe7,0x16,0x0f,0x69,0x13,0x90 = mod r6, r9 +0x9f,0x35,0x5a,0x59,0xd6,0x70,0xd9,0x5e = mod64 r5, r3 +0xa4,0x89,0x6b,0x5f,0x0d,0xbf,0x90,0xf7 = xor r9, 0xf790bf0d +0xa5,0xd4,0xef,0x79,0xd3,0xbb,0xde,0xfd = jlt r4, 0xfddebbd3, +0x79ef +0xa7,0x80,0x8b,0x18,0xa9,0x34,0x74,0x45 = xor64 r0, 0x457434a9 +0xac,0x36,0x16,0xe0,0x0f,0x52,0x30,0x65 = xor r6, r3 +0xaf,0x41,0x04,0xc2,0x2e,0xc9,0xf7,0x84 = xor64 r1, r4 +0xb4,0xa1,0x9c,0x78,0xf9,0x3f,0x77,0x1f = mov r1, 0x1f773ff9 +0xb5,0x92,0x5d,0x5a,0x49,0x33,0xfc,0x33 = jle r2, 0x33fc3349, +0x5a5d +0xb7,0x70,0x59,0x4d,0x5b,0x52,0x2a,0x99 = mov64 r0, 0x992a525b +0xbc,0x72,0x3e,0x6c,0xc9,0x8a,0x56,0xd6 = mov r2, r7 +0xbd,0x19,0x80,0xe8,0x29,0x85,0xcf,0x51 = jle r9, r1, +0xe880 +0xbf,0x86,0x55,0x58,0xb2,0x6d,0x14,0x03 = mov64 r6, r8 +0xc4,0xb6,0xe2,0xe0,0x7c,0x68,0xc5,0x2b = arsh r6, 0x2bc5687c +0xc5,0xf2,0xeb,0xe4,0xba,0xc0,0xce,0x4f = jslt r2, 0x4fcec0ba, +0xe4eb +0xc7,0xe8,0xba,0xff,0x1f,0xef,0xc0,0x88 = arsh64 r8, 0x88c0ef1f +0xcc,0x38,0xc5,0x37,0x13,0xc0,0xe7,0x27 = arsh r8, r3 +0xcd,0x90,0x67,0x88,0x6b,0xd0,0x27,0xf4 = jslt r0, r9, +0x8867 +0xcf,0x82,0xe1,0xcd,0xbe,0xc3,0x2d,0x7c = arsh64 r2, r8 +0xd4,0x53,0x3f,0x0c,0x40,0x00,0x00,0x00 = le64 r3 +0xd5,0xe9,0xf6,0xb2,0x50,0xfd,0xb0,0xe5 = jsle r9, 0xe5b0fd50, +0xb2f6 +0xdc,0xb2,0xa3,0x50,0x20,0x00,0x00,0x00 = be32 r2 +0xdd,0x95,0xbf,0xb1,0xf2,0x5f,0x7b,0xc4 = jsle r5, r9, +0xb1bf diff --git a/suite/MC/BPF/extended-be.cs b/suite/MC/BPF/extended-be.cs new file mode 100644 index 00000000..aa97d569 --- /dev/null +++ b/suite/MC/BPF/extended-be.cs @@ -0,0 +1,15 @@ +# CS_ARCH_BPF, CS_MODE_BIG_ENDIAN+CS_MODE_BPF_EXTENDED, None +0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00 = ldb [0x0] +0x28,0x00,0x00,0x00,0xfa,0x00,0x00,0xff = ldh [0xfa0000ff] +0x40,0x10,0x00,0x00,0xcc,0x00,0x00,0x00 = ldw [r1+0xcc000000] +0x18,0x00,0x00,0x00,0x0c,0xb0,0xce,0xfa,0x00,0x00,0x00,0x00,0xef,0xbe,0xad,0xde = lddw 0xefbeadde0cb0cefa +0x71,0x13,0x11,0x00,0x00,0x00,0x00,0x00 = ldxb r3, [r1+0x1100] +0x94,0x09,0x00,0x00,0x37,0x13,0x03,0x00 = mod r9, 0x37130300 +0x84,0x03,0x00,0x00,0x00,0x00,0x00,0x00 = neg r3 +0x87,0x00,0x00,0x00,0x00,0x00,0x00,0x00 = neg64 r0 +0xdc,0x02,0x00,0x00,0x00,0x00,0x00,0x20 = be32 r2 +0x05,0x00,0x08,0x00,0x00,0x00,0x00,0x00 = jmp +0x800 +0xdd,0x35,0x30,0x00,0x00,0x00,0x00,0x00 = jsle r5, r3, +0x3000 +0xa5,0x35,0x30,0x00,0x10,0x00,0x00,0x00 = jlt r5, 0x10000000, +0x3000 +0xc3,0x12,0x00,0x10,0x00,0x00,0x00,0x00 = xaddw [r2+0x10], r1 +0xdb,0xa9,0x00,0x01,0x00,0x00,0x00,0x00 = xadddw [r9+0x1], r10 diff --git a/suite/cstest/include/capstone_test.h b/suite/cstest/include/capstone_test.h index 1299190f..ae0eedce 100644 --- a/suite/cstest/include/capstone_test.h +++ b/suite/cstest/include/capstone_test.h @@ -35,8 +35,8 @@ } \ } while (0) -#define NUMARCH 9 -#define NUMMODE 33 +#define NUMARCH 10 +#define NUMMODE 35 #define NUMOPTION 41 #define MAXMEM 1024 diff --git a/suite/cstest/include/factory.h b/suite/cstest/include/factory.h index 588fd734..8ca63558 100644 --- a/suite/cstest/include/factory.h +++ b/suite/cstest/include/factory.h @@ -21,5 +21,6 @@ char *get_detail_xcore(csh *handle, cs_mode mode, cs_insn *ins); char *get_detail_m68k(csh *handle, cs_mode mode, cs_insn *ins); char *get_detail_mos65xx(csh *handle, cs_mode mode, cs_insn *ins); char *get_detail_tms320c64x(csh *handle, cs_mode mode, cs_insn *ins); +char *get_detail_bpf(csh *handle, cs_mode mode, cs_insn *ins); #endif /* FACTORY_H */ diff --git a/suite/cstest/src/bpf_detail.c b/suite/cstest/src/bpf_detail.c new file mode 100644 index 00000000..d72332f7 --- /dev/null +++ b/suite/cstest/src/bpf_detail.c @@ -0,0 +1,77 @@ +/* Capstone testing regression */ +/* By david942j , 2019 */ + +#include + +#include "factory.h" + +static char * ext_name[] = { + [BPF_EXT_LEN] = "#len", +}; + +char *get_detail_bpf(csh *handle, cs_mode mode, cs_insn *ins) +{ + cs_bpf *bpf; + unsigned int i; + cs_regs regs_read, regs_write; + uint8_t regs_read_count, regs_write_count; + char *result; + + result = (char *)malloc(sizeof(char)); + result[0] = '\0'; + if (ins->detail == NULL) + return result; + + bpf = &(ins->detail->bpf); + + if (bpf->op_count) + add_str(&result, " ; op_count: %u", bpf->op_count); + for (i = 0; i < bpf->op_count; i++) { + cs_bpf_op *op = &(bpf->operands[i]); + add_str(&result, " ; operands[%u].type: ", i); + switch (op->type) { + case BPF_OP_INVALID: + add_str(&result, "INVALID"); + break; + case BPF_OP_REG: + add_str(&result, "REG = %s", cs_reg_name(*handle, op->reg)); + break; + case BPF_OP_IMM: + add_str(&result, "IMM = 0x%" PRIx64, op->imm); + break; + case BPF_OP_OFF: + add_str(&result, "OFF = +0x%x", op->off); + break; + case BPF_OP_MEM: + add_str(&result, "MEM [base=%s, disp=0x%x]", + cs_reg_name(*handle, op->mem.base), op->mem.disp); + break; + case BPF_OP_MMEM: + add_str(&result, "MMEM = M[0x%x]", op->mmem); + break; + case BPF_OP_MSH: + add_str(&result, "MSH = 4*([0x%x]&0xf)", op->msh); + break; + case BPF_OP_EXT: + add_str(&result, "EXT = %s", ext_name[op->ext]); + break; + } + } + + if (!cs_regs_access(*handle, ins, + regs_read, ®s_read_count, + regs_write, ®s_write_count)) { + if (regs_read_count) { + add_str(&result, " ; Registers read:"); + for(i = 0; i < regs_read_count; i++) + add_str(&result, " %s", cs_reg_name(*handle, regs_read[i])); + } + + if (regs_write_count) { + add_str(&result, " ; Registers modified:"); + for(i = 0; i < regs_write_count; i++) + add_str(&result, " %s", cs_reg_name(*handle, regs_write[i])); + } + } + return result; +} diff --git a/suite/cstest/src/capstone_test.c b/suite/cstest/src/capstone_test.c index d966a12a..46738369 100644 --- a/suite/cstest/src/capstone_test.c +++ b/suite/cstest/src/capstone_test.c @@ -13,7 +13,8 @@ single_dict arches[] = { {"CS_ARCH_SYSZ", CS_ARCH_SYSZ}, {"CS_ARCH_X86", CS_ARCH_X86}, {"CS_ARCH_XCORE", CS_ARCH_XCORE}, - {"CS_ARCH_M68K", CS_ARCH_M68K} + {"CS_ARCH_M68K", CS_ARCH_M68K}, + {"CS_ARCH_BPF", CS_ARCH_BPF}, }; single_dict modes[] = { @@ -49,7 +50,9 @@ single_dict modes[] = { {"CS_MODE_M680X_6809", CS_MODE_M680X_6809}, {"CS_MODE_M680X_6811", CS_MODE_M680X_6811}, {"CS_MODE_M680X_CPU12", CS_MODE_M680X_CPU12}, - {"CS_MODE_M680X_HCS08", CS_MODE_M680X_HCS08} + {"CS_MODE_M680X_HCS08", CS_MODE_M680X_HCS08}, + {"CS_MODE_BPF_CLASSIC", CS_MODE_BPF_CLASSIC}, + {"CS_MODE_BPF_EXTENDED", CS_MODE_BPF_EXTENDED}, }; double_dict options[] = { @@ -93,7 +96,7 @@ double_dict options[] = { {"CS_MODE_M680X_6811", CS_OPT_MODE, CS_MODE_M680X_6811}, {"CS_MODE_M680X_CPU12", CS_OPT_MODE, CS_MODE_M680X_CPU12}, {"CS_MODE_M680X_HCS08", CS_OPT_MODE, CS_MODE_M680X_HCS08}, - {"CS_OPT_UNSIGNED", CS_OPT_UNSIGNED, CS_OPT_ON} + {"CS_OPT_UNSIGNED", CS_OPT_UNSIGNED, CS_OPT_ON}, }; char *(*function)(csh *, cs_mode, cs_insn*) = NULL; @@ -265,6 +268,9 @@ int set_function(int arch) case CS_ARCH_TMS320C64X: function = get_detail_tms320c64x; break; + case CS_ARCH_BPF: + function = get_detail_bpf; + break; default: return -1; } diff --git a/suite/fuzz/drivermc.c b/suite/fuzz/drivermc.c index a6a0163e..70402076 100644 --- a/suite/fuzz/drivermc.c +++ b/suite/fuzz/drivermc.c @@ -91,6 +91,10 @@ int main(int argc, char** argv) Data[0] = 24; } else if (strcmp(arch, "CS_ARCH_EVM") == 0 && strcmp(mode, "0") == 0) { Data[0] = 25; + } else if (strcmp(arch, "CS_ARCH_BPF") == 0 && strstr(mode, "CS_MODE_BPF_CLASSIC") != NULL) { + Data[0] = 29; + } else if (strcmp(arch, "CS_ARCH_BPF") == 0 && strstr(mode, "CS_MODE_BPF_EXTENDED") != NULL) { + Data[0] = 30; } else { printf("Unknown mode\n"); //fail instead of continue diff --git a/suite/fuzz/fuzz_disasm.c b/suite/fuzz/fuzz_disasm.c index 6cf14eeb..bfee670c 100644 --- a/suite/fuzz/fuzz_disasm.c +++ b/suite/fuzz/fuzz_disasm.c @@ -189,14 +189,24 @@ static struct platform platforms[] = { CS_MODE_BIG_ENDIAN, "tms320c64x" }, -#if CS_NEXT_VERSION >= 5 { //item 28 CS_ARCH_WASM, (cs_mode)0, "WASM" }, -#endif + { + //item 29 + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC, + "cBPF" + }, + { + //item 30 + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, + "eBPF" + }, }; int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/tests/Makefile b/tests/Makefile index f1d22e5e..eda7e3d0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -121,6 +121,10 @@ ifneq (,$(findstring evm,$(CAPSTONE_ARCHS))) CFLAGS += -DCAPSTONE_HAS_MOS65XX SOURCES += test_mos65xx.c endif +ifneq (,$(findstring bpf,$(CAPSTONE_ARCHS))) +CFLAGS += -DCAPSTONE_HAS_BPF +SOURCES += test_bpf.c +endif OBJS = $(addprefix $(OBJDIR)/,$(SOURCES:.c=.o)) BINARY = $(addprefix $(TESTDIR)/,$(SOURCES:.c=$(BIN_EXT))) diff --git a/tests/test_basic.c b/tests/test_basic.c index d87e9ccc..8ee1536a 100644 --- a/tests/test_basic.c +++ b/tests/test_basic.c @@ -91,7 +91,7 @@ static void test() #ifdef CAPSTONE_HAS_MOS65XX #define MOS65XX_CODE "\x0d\x34\x12\x00\x81\x65\x87\x6c\x01\x00\x85\xFF\x10\x00\x19\x42\x42\x00\x49\x42" #endif - +#define EBPF_CODE "\x97\x09\x00\x00\x37\x13\x03\x00\xdc\x02\x00\x00\x20\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\xdb\x3a\x00\x01\x00\x00\x00\x00\x84\x02\x00\x00\x00\x00\x00\x00\x6d\x33\x17\x02\x00\x00\x00\x00" struct platform { cs_arch arch; @@ -339,6 +339,15 @@ static void test() sizeof(MOS65XX_CODE) - 1, "MOS65XX" }, +#endif +#ifdef CAPSTONE_HAS_BPF + { + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, + (unsigned char*) EBPF_CODE, + sizeof(EBPF_CODE) - 1, + "eBPF" + }, #endif }; diff --git a/tests/test_bpf.c b/tests/test_bpf.c new file mode 100644 index 00000000..b97e330c --- /dev/null +++ b/tests/test_bpf.c @@ -0,0 +1,187 @@ +/* Capstone Disassembly Engine */ +/* By david942j , 2019 */ + +#include +#include + +static csh handle; + +struct platform { + cs_arch arch; + cs_mode mode; + const unsigned char *code; + size_t size; + const char *comment; +}; + +static void print_string_hex(const char *comment, const unsigned char *str, size_t len) +{ + const unsigned char *c; + + printf("%s", comment); + for (c = str; c < str + len; c++) { + printf(" 0x%02x", *c & 0xff); + } + + printf("\n"); +} + +static const char * ext_name[] = { + [BPF_EXT_LEN] = "#len", +}; + +static void print_insn_detail(csh cs_handle, cs_insn *ins) +{ + cs_bpf *bpf; + cs_regs regs_read, regs_write; + uint8_t regs_read_count, regs_write_count; + unsigned i; + + // detail can be NULL on "data" instruction if SKIPDATA option is turned ON + if (ins->detail == NULL) + return; + + if (ins->detail->groups_count) { + int j; + + printf("\tGroups:"); + for(j = 0; j < ins->detail->groups_count; j++) + printf(" %s", cs_group_name(handle, ins->detail->groups[j])); + printf("\n"); + } + + bpf = &(ins->detail->bpf); + + printf("\tOperand count: %u\n", bpf->op_count); + for (i = 0; i < bpf->op_count; i++) { + cs_bpf_op *op = &(bpf->operands[i]); + printf("\t\toperands[%u].type: ", i); + switch (op->type) { + case BPF_OP_INVALID: + printf("INVALID\n"); + break; + case BPF_OP_REG: + printf("REG = %s\n", cs_reg_name(handle, op->reg)); + break; + case BPF_OP_IMM: + printf("IMM = 0x%" PRIx64 "\n", op->imm); + break; + case BPF_OP_OFF: + printf("OFF = +0x%x\n", op->off); + break; + case BPF_OP_MEM: + printf("MEM\n"); + if (op->mem.base != BPF_REG_INVALID) + printf("\t\t\toperands[%u].mem.base: REG = %s\n", + i, cs_reg_name(handle, op->mem.base)); + printf("\t\t\toperands[%u].mem.disp: 0x%x\n", i, op->mem.disp); + break; + case BPF_OP_MMEM: + printf("MMEM = M[0x%x]\n", op->mmem); + break; + case BPF_OP_MSH: + printf("MSH = 4*([0x%x]&0xf)\n", op->msh); + break; + case BPF_OP_EXT: + printf("EXT = %s\n", ext_name[op->ext]); + break; + } + } + + /* print all registers that are involved in this instruction */ + if (!cs_regs_access(cs_handle, ins, + regs_read, ®s_read_count, + regs_write, ®s_write_count)) { + if (regs_read_count) { + printf("\tRegisters read:"); + for(i = 0; i < regs_read_count; i++) + printf(" %s", cs_reg_name(cs_handle, regs_read[i])); + printf("\n"); + } + + if (regs_write_count) { + printf("\tRegisters modified:"); + for(i = 0; i < regs_write_count; i++) + printf(" %s", cs_reg_name(cs_handle, regs_write[i])); + printf("\n"); + } + } + puts(""); +} + +static void test() +{ +#define CBPF_CODE "\x94\x09\x00\x00\x37\x13\x03\x00" \ + "\x87\x00\x00\x00\x00\x00\x00\x00" \ + "\x07\x00\x00\x00\x00\x00\x00\x00" \ + "\x16\x00\x00\x00\x00\x00\x00\x00" \ + "\x80\x00\x00\x00\x00\x00\x00\x00" + +#define EBPF_CODE "\x97\x09\x00\x00\x37\x13\x03\x00" \ + "\xdc\x02\x00\x00\x20\x00\x00\x00" \ + "\x30\x00\x00\x00\x00\x00\x00\x00" \ + "\xdb\x3a\x00\x01\x00\x00\x00\x00" \ + "\x84\x02\x00\x00\x00\x00\x00\x00" \ + "\x6d\x33\x17\x02\x00\x00\x00\x00" + struct platform platforms[] = { + { + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC, + (unsigned char *)CBPF_CODE, + sizeof(CBPF_CODE) - 1, + "cBPF Le" + }, + { + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, + (unsigned char *)EBPF_CODE, + sizeof(EBPF_CODE) - 1, + "eBPF Le" + }, + }; + uint64_t address = 0x0; + cs_insn *insn; + int i; + size_t count; + + for (i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { + cs_err err = cs_open(platforms[i].arch, platforms[i].mode, &handle); + if (err) { + printf("Failed on cs_open() with error returned: %u\n", err); + abort(); + } + + cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON); + + count = cs_disasm(handle, platforms[i].code, platforms[i].size, address, 0, &insn); + if (count) { + size_t j; + printf("****************\n"); + printf("Platform: %s\n", platforms[i].comment); + print_string_hex("Code:", platforms[i].code, platforms[i].size); + printf("Disasm:\n"); + + for (j = 0; j < count; j++) { + printf("0x%" PRIx64 ":\t%s\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str); + print_insn_detail(handle, &insn[j]); + } + + // free memory allocated by cs_disasm() + cs_free(insn, count); + } else { + printf("****************\n"); + printf("Platform: %s\n", platforms[i].comment); + print_string_hex("Code:", platforms[i].code, platforms[i].size); + printf("ERROR: Failed to disasm given code!\n"); + abort(); + } + + cs_close(&handle); + } +} + +int main() +{ + test(); + return 0; +} diff --git a/tests/test_detail.c b/tests/test_detail.c index 2c080a55..ba8d1579 100644 --- a/tests/test_detail.c +++ b/tests/test_detail.c @@ -89,7 +89,7 @@ static void test() #ifdef CAPSTONE_HAS_MOS65XX #define MOS65XX_CODE "\x0A\x00\xFE\x34\x12\xD0\xFF\xEA\x19\x56\x34\x46\x80" #endif - +#define EBPF_CODE "\x97\x09\x00\x00\x37\x13\x03\x00\xdc\x02\x00\x00\x20\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\xdb\x3a\x00\x01\x00\x00\x00\x00\x84\x02\x00\x00\x00\x00\x00\x00\x6d\x33\x17\x02\x00\x00\x00\x00" struct platform platforms[] = { #ifdef CAPSTONE_HAS_X86 @@ -283,6 +283,15 @@ static void test() sizeof(MOS65XX_CODE) - 1, "MOS65XX", }, +#endif +#ifdef CAPSTONE_HAS_BPF + { + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, + (unsigned char*) EBPF_CODE, + sizeof(EBPF_CODE) - 1, + "eBPF" + }, #endif }; diff --git a/tests/test_iter.c b/tests/test_iter.c index f215ba9e..e2dd19fd 100644 --- a/tests/test_iter.c +++ b/tests/test_iter.c @@ -82,7 +82,7 @@ static void test() #ifdef CAPSTONE_HAS_MOS65XX #define MOS65XX_CODE "\x0d\x34\x12\x08\x09\xFF\x10\x80\x20\x00\x00\x98" #endif - +#define EBPF_CODE "\x97\x09\x00\x00\x37\x13\x03\x00\xdc\x02\x00\x00\x20\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\xdb\x3a\x00\x01\x00\x00\x00\x00\x84\x02\x00\x00\x00\x00\x00\x00\x6d\x33\x17\x02\x00\x00\x00\x00" struct platform platforms[] = { #ifdef CAPSTONE_HAS_X86 @@ -232,6 +232,15 @@ static void test() sizeof(MOS65XX_CODE) - 1, "MOS65XX" }, +#endif +#ifdef CAPSTONE_HAS_BPF + { + CS_ARCH_BPF, + CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_EXTENDED, + (unsigned char*) EBPF_CODE, + sizeof(EBPF_CODE) - 1, + "eBPF" + }, #endif };