Source code for envi.archs.i386.disasm

"""
The guts for the i386 envi opcode disassembler.
"""

import struct

import envi
import envi.bits as e_bits

import opcode86
all_tables = opcode86.tables86

# Grab our register enums etc...
from envi.archs.i386.regs import *

# Our instruction prefix masks
# NOTE: table 3-4 (section 3.6) of intel 1 shows how REX/OP_SIZE
# interact...
INSTR_PREFIX=      0x0001
PREFIX_LOCK =      0x0002
PREFIX_REPNZ=      0x0004
PREFIX_REPZ =      0x0008
PREFIX_REP  =      0x0010
PREFIX_REP_SIMD=   0x0020
PREFIX_OP_SIZE=    0x0040
PREFIX_ADDR_SIZE=  0x0080
PREFIX_SIMD=       0x0100
PREFIX_CS  =       0x0200
PREFIX_SS  =       0x0400
PREFIX_DS  =       0x0800
PREFIX_ES  =       0x1000
PREFIX_FS  =       0x2000
PREFIX_GS  =       0x4000
PREFIX_REG_MASK=   0x8000

# envi.registers meta offsets
RMETA_LOW8  = 0x00080000
RMETA_HIGH8 = 0x08080000
RMETA_LOW16 = 0x00100000

# Use a list here instead of a dict for speed (max 255 anyway)
i386_prefixes = [ None for i in range(256) ]
i386_prefixes[0xF0] = PREFIX_LOCK
i386_prefixes[0xF2] = PREFIX_REPNZ
i386_prefixes[0xF3] = PREFIX_REP
i386_prefixes[0x2E] = PREFIX_CS
i386_prefixes[0x36] = PREFIX_SS
i386_prefixes[0x3E] = PREFIX_DS
i386_prefixes[0x26] = PREFIX_ES
i386_prefixes[0x64] = PREFIX_FS
i386_prefixes[0x65] = PREFIX_GS
i386_prefixes[0x66] = PREFIX_OP_SIZE
i386_prefixes[0x67] = PREFIX_ADDR_SIZE

# The scale byte index into this for multiplier imm
scale_lookup = (1, 2, 4, 8)

# A set of instructions that are considered privileged (mark with IF_PRIV)
# FIXME this should be part of the opcdode tables!
priv_lookup = {
    "int":True,
    "in":True,
    "out":True,
    "insb":True,
    "outsb":True,
    "insd":True,
    "outsd":True,
    "vmcall":True,
    "vmlaunch":True,
    "vmresume":True,
    "vmxoff":True,
    "vmread":True,
    "vmwrite":True,
    "rsm":True,
    "lar":True,
    "lsl":True,
    "clts":True,
    "invd":True,
    "wbinvd":True,
    "wrmsr":True,
    "rdmsr":True,
    "sysexit":True,
    "lgdt":True,
    "lidt":True,
    "lmsw":True,
    "monitor":True,
    "mwait":True,
    "vmclear":True,
    "vmptrld":True,
    "vmptrst":True,
    "vmxon":True,
}

# Map of codes to their respective envi flags
iflag_lookup = {
    opcode86.INS_RET: envi.IF_NOFALL|envi.IF_RET,
    opcode86.INS_CALL: envi.IF_CALL,
    opcode86.INS_HALT: envi.IF_NOFALL,
    opcode86.INS_CALLCC: envi.IF_CALL,
    opcode86.INS_BRANCH: envi.IF_NOFALL | envi.IF_BRANCH,
    opcode86.INS_BRANCHCC: envi.IF_BRANCH,
}

sizenames = ["" for x in range(17)]
sizenames[1] = "byte"
sizenames[2] = "word"
sizenames[4] = "dword"
sizenames[8] = "qword"
sizenames[16] = "oword"

def addrToName(mcanv, va):
[docs] sym = mcanv.syms.getSymByAddr(va) if sym != None: return repr(sym) return "0x%.8x" % va ########################################################################### # # Operand objects for the i386 architecture # class i386RegOper(envi.RegisterOper):
[docs] def __init__(self, reg, tsize): self.reg = reg self.tsize = tsize def repr(self, op):
[docs] return self._dis_regctx.getRegisterName(self.reg) def getOperValue(self, op, emu=None):
[docs] if emu == None: return None # This operand type requires an emulator return emu.getRegister(self.reg) def setOperValue(self, op, emu, value):
[docs] emu.setRegister(self.reg, value) def render(self, mcanv, op, idx):
[docs] hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: mcanv.addNameText(name, typename="registers") else: name = self._dis_regctx.getRegisterName(self.reg) mcanv.addNameText(name, typename="registers") def __eq__(self, other):
if not isinstance(other, i386RegOper): return False if other.reg != self.reg: return False if other.tsize != self.tsize: return False return True class i386ImmOper(envi.ImmedOper):
[docs] """ An operand representing an immediate. """ def __init__(self, imm, tsize): self.imm = imm self.tsize = tsize def repr(self, op):
[docs] ival = self.imm if ival > 4096: return "0x%.8x" % ival return str(ival) def getOperValue(self, op, emu=None):
[docs] return self.imm def render(self, mcanv, op, idx):
[docs] value = self.imm hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: if mcanv.mem.isValidPointer(value): mcanv.addVaText(hint, value) else: mcanv.addNameText(hint) elif mcanv.mem.isValidPointer(value): name = addrToName(mcanv, value) mcanv.addVaText(name, value) else: if self.imm >= 4096: mcanv.addNameText('0x%.8x' % value) else: mcanv.addNameText(str(value)) def __eq__(self, other):
if not isinstance(other, i386ImmOper): return False if other.imm != self.imm: return False if other.tsize != self.tsize: return False return True class i386PcRelOper(envi.Operand):
[docs] """ This is the operand used for EIP relative offsets for operands on instructions like jmp/call """ def __init__(self, imm, tsize): self.imm = imm self.tsize = tsize def repr(self, op):
[docs] return "0x%.8x" % (op.va + op.size + self.imm) def isImmed(self):
[docs] return True # FIXME trying this out.... def getOperValue(self, op, emu=None):
[docs] return op.va + op.size + self.imm def render(self, mcanv, op, idx):
[docs] hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: mcanv.addVaText(hint, value) else: value = op.va + op.size + self.imm name = addrToName(mcanv, value) mcanv.addVaText(name, value) def __eq__(self, other):
if not isinstance(other, i386PcRelOper): return False if other.imm != self.imm: return False if other.tsize != self.tsize: return False return True class i386RegMemOper(envi.DerefOper):
[docs] """ An operand which represents the result of reading/writting memory from the dereference (with possible displacement) from a given register. """ def __init__(self, reg, tsize, disp=0): self.reg = reg self.tsize = tsize self.disp = disp self._is_deref = True def repr(self, op):
[docs] r = self._dis_regctx.getRegisterName(self.reg) if self.disp > 0: return "%s [%s + %d]" % (sizenames[self.tsize],r,self.disp) elif self.disp < 0: return "%s [%s - %d]" % (sizenames[self.tsize],r,abs(self.disp)) return "%s [%s]" % (sizenames[self.tsize],r) def getOperValue(self, op, emu=None):
[docs] if emu == None: return None # This operand type requires an emulator return emu.readMemValue(self.getOperAddr(op, emu), self.tsize) def setOperValue(self, op, emu, val):
[docs] emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize) def getOperAddr(self, op, emu):
[docs] if emu == None: return None # This operand type requires an emulator base, size = emu.getSegmentInfo(op) rval = emu.getRegister(self.reg) return base + rval + self.disp def isDeref(self):
[docs] # The disassembler may reach in and set this (if lea...) return self._is_deref def render(self, mcanv, op, idx):
[docs] mcanv.addNameText(sizenames[self.tsize]) mcanv.addText(" [") mcanv.addNameText(self._dis_regctx.getRegisterName(self.reg), typename="registers") hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: mcanv.addText(" + ") mcanv.addNameText(hint) else: if self.disp > 0: mcanv.addText(" + ") mcanv.addNameText(str(self.disp)) elif self.disp < 0: mcanv.addText(" - ") mcanv.addNameText(str(abs(self.disp))) mcanv.addText("]") def __eq__(self, other):
if not isinstance(other, i386RegMemOper): return False if other.reg != self.reg: return False if other.disp != self.disp: return False if other.tsize != self.tsize: return False return True class i386ImmMemOper(envi.DerefOper):
[docs] """ An operand which represents the dereference (memory read/write) of a memory location associated with an immediate. """ def __init__(self, imm, tsize): self.imm = imm self.tsize = tsize self._is_deref = True def isDeref(self):
[docs] # The disassembler may reach in and set this (if lea...) return self._is_deref def repr(self, op):
[docs] return "%s [0x%.8x]" % (sizenames[self.tsize], self.imm) def getOperValue(self, op, emu=None):
[docs] if emu == None: return None # This operand type requires an emulator return emu.readMemValue(self.getOperAddr(op, emu), self.tsize) def setOperValue(self, op, emu, val):
[docs] emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize) def getOperAddr(self, op, emu=None):
[docs] ret = self.imm if emu != None: base, size = emu.getSegmentInfo(op) ret += base return ret def render(self, mcanv, op, idx):
[docs] mcanv.addNameText(sizenames[self.tsize]) mcanv.addText(" [") value = self.imm hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: mcanv.addVaText(hint, value) else: name = addrToName(mcanv, value) mcanv.addVaText(name, value) mcanv.addText("]") def __eq__(self, other):
if not isinstance(other, i386ImmMemOper): return False if other.imm != self.imm: return False if other.tsize != self.tsize: return False return True class i386SibOper(envi.DerefOper):
[docs] """ An operand which represents the result of reading/writting memory from the dereference (with possible displacement) from a given register. """ def __init__(self, tsize, reg=None, imm=None, index=None, scale=1, disp=0): self.reg = reg self.imm = imm self.index = index self.scale = scale self.tsize = tsize self.disp = disp self._is_deref = True def __eq__(self, other): if not isinstance(other, i386SibOper): return False if other.imm != self.imm: return False if other.reg != self.reg: return False if other.index != self.index: return False if other.scale != self.scale: return False if other.disp != self.disp: return False if other.tsize != self.tsize: return False return True def isDeref(self):
[docs] return self._is_deref def repr(self, op):
[docs] r = "%s [" % sizenames[self.tsize] if self.reg != None: r += self._dis_regctx.getRegisterName(self.reg) if self.imm != None: r += "0x%.8x" % self.imm if self.index != None: r += " + %s" % self._dis_regctx.getRegisterName(self.index) if self.scale != 1: r += " * %d" % self.scale if self.disp > 0: r += " + %d" % self.disp elif self.disp < 0: r += " - %d" % abs(self.disp) r += "]" return r def getOperValue(self, op, emu=None):
[docs] if emu == None: return None # This operand type requires an emulator return emu.readMemValue(self.getOperAddr(op, emu), self.tsize) def setOperValue(self, op, emu, val):
[docs] emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize) def getOperAddr(self, op, emu=None):
[docs] if emu == None: return None # This operand type requires an emulator ret = 0 if self.imm != None: ret += self.imm if self.reg != None: ret += emu.getRegister(self.reg) if self.index != None: ret += (emu.getRegister(self.index) * self.scale) # Handle x86 segmentation base, size = emu.getSegmentInfo(op) ret += base return ret + self.disp def _getOperBase(self, emu=None):
# Special SIB only method for getting the SIB base value if self.imm: return self.imm if emu: return emu.getRegister(self.reg) return None def render(self, mcanv, op, idx):
[docs] mcanv.addNameText(sizenames[self.tsize]) mcanv.addText(" [") if self.imm != None: name = addrToName(mcanv, self.imm) mcanv.addVaText(name, self.imm) if self.reg != None: name = self._dis_regctx.getRegisterName(self.reg) mcanv.addNameText(name, typename="registers") # Does our SIB have a scale if self.index != None: mcanv.addText(" + ") name = self._dis_regctx.getRegisterName(self.index) mcanv.addNameText(name, typename="registers") if self.scale != 1: mcanv.addText(" * ") mcanv.addNameText(str(self.scale)) hint = mcanv.syms.getSymHint(op.va, idx) if hint != None: mcanv.addText(" + ") mcanv.addNameText(hint) else: # If we have a displacement, add it. if self.disp != 0: mcanv.addText(" + ") mcanv.addNameText(str(self.disp)) mcanv.addText("]") class i386Opcode(envi.Opcode):
[docs] # Printable prefix names prefix_names = [ (PREFIX_LOCK, "lock"), (PREFIX_REPNZ, "repnz"), (PREFIX_REP, "rep"), (PREFIX_CS, "cs"), (PREFIX_SS, "ss"), (PREFIX_DS, "ds"), (PREFIX_ES, "es"), (PREFIX_FS, "fs"), (PREFIX_GS, "gs"), ] def getBranches(self, emu=None):
[docs] ret = [] # To start with we have no flags. flags = 0 addb = False # If we are a conditional branch, even our fallthrough # case is conditional... if self.opcode == opcode86.INS_BRANCHCC: flags |= envi.BR_COND addb = True # If we can fall through, reflect that... if not self.iflags & envi.IF_NOFALL: ret.append((self.va + self.size, flags|envi.BR_FALL)) # In intel, if we have no operands, it has no # further branches... if len(self.opers) == 0: return ret # Check for a call... if self.opcode == opcode86.INS_CALL: flags |= envi.BR_PROC addb = True # A conditional call? really? what compiler did you use? ;) elif self.opcode == opcode86.INS_CALLCC: flags |= (envi.BR_PROC | envi.BR_COND) addb = True elif self.opcode == opcode86.INS_BRANCH: oper0 = self.opers[0] if isinstance(oper0, i386SibOper) and oper0.scale == 4: # In the case with no emulator, note that our deref is # from the base of a table. If we have one, parse out all the # valid pointers from our base base = oper0._getOperBase(emu) if emu == None: ret.append((base, flags | envi.BR_DEREF | envi.BR_TABLE)) else: # Since we're parsing this out, lets just resolve the derefs # for our caller... dest = emu.readMemValue(base, oper0.tsize) while emu.isValidPointer(dest): ret.append((dest, envi.BR_COND)) base += oper0.tsize dest = emu.readMemValue(base, oper0.tsize) else: addb = True if addb: oper0 = self.opers[0] if oper0.isDeref(): flags |= envi.BR_DEREF tova = oper0.getOperAddr(self, emu=emu) else: tova = oper0.getOperValue(self, emu=emu) ret.append((tova, flags)) return ret def render(self, mcanv):
[docs] """ Render this opcode to the specified memory canvas """ if self.prefixes: pfx = self.getPrefixName() if pfx: mcanv.addNameText("%s: " % pfx, pfx) mcanv.addNameText(self.mnem, typename="mnemonic") mcanv.addText(" ") # Allow each of our operands to render imax = len(self.opers) lasti = imax - 1 for i in xrange(imax): oper = self.opers[i] oper.render(mcanv, self, i) if i != lasti: mcanv.addText(",") operand_range = (2,3,4)
MODE_16 = 0 MODE_32 = 1 MODE_64 = 2 class i386Disasm:
[docs] def __init__(self, mode=MODE_32): self._dis_mode = MODE_32 self._dis_prefixes = i386_prefixes self._dis_regctx = i386RegisterContext() # This will make function lookups nice and quick self._dis_amethods = [ None for x in range(22) ] self._dis_amethods[opcode86.ADDRMETH_A>>16] = self.ameth_a self._dis_amethods[opcode86.ADDRMETH_C>>16] = self.ameth_c self._dis_amethods[opcode86.ADDRMETH_D>>16] = self.ameth_d self._dis_amethods[opcode86.ADDRMETH_E>>16] = self.ameth_e self._dis_amethods[opcode86.ADDRMETH_M>>16] = self.ameth_e self._dis_amethods[opcode86.ADDRMETH_N>>16] = self.ameth_n self._dis_amethods[opcode86.ADDRMETH_Q>>16] = self.ameth_q self._dis_amethods[opcode86.ADDRMETH_R>>16] = self.ameth_e self._dis_amethods[opcode86.ADDRMETH_W>>16] = self.ameth_w self._dis_amethods[opcode86.ADDRMETH_I>>16] = self.ameth_i self._dis_amethods[opcode86.ADDRMETH_J>>16] = self.ameth_j self._dis_amethods[opcode86.ADDRMETH_O>>16] = self.ameth_o self._dis_amethods[opcode86.ADDRMETH_G>>16] = self.ameth_g self._dis_amethods[opcode86.ADDRMETH_P>>16] = self.ameth_p self._dis_amethods[opcode86.ADDRMETH_S>>16] = self.ameth_s self._dis_amethods[opcode86.ADDRMETH_U>>16] = self.ameth_u self._dis_amethods[opcode86.ADDRMETH_V>>16] = self.ameth_v self._dis_amethods[opcode86.ADDRMETH_X>>16] = self.ameth_x self._dis_amethods[opcode86.ADDRMETH_Y>>16] = self.ameth_y # Offsets used to add in addressing method parsers self.ROFFSETMMX = getRegOffset(i386regs, "mm0") self.ROFFSETSIMD = getRegOffset(i386regs, "xmm0") self.ROFFSETDEBUG = getRegOffset(i386regs, "debug0") self.ROFFSETCTRL = getRegOffset(i386regs, "ctrl0") self.ROFFSETTEST = getRegOffset(i386regs, "test0") self.ROFFSETSEG = getRegOffset(i386regs, "es") self.ROFFSETFPU = getRegOffset(i386regs, "st0") def parse_modrm(self, byte):
[docs] # Pass in a string with an offset for speed rather than a new string mod = (byte >> 6) & 0x3 reg = (byte >> 3) & 0x7 rm = byte & 0x7 #print "MOD/RM",hex(byte),mod,reg,rm return (mod,reg,rm) def byteRegOffset(self, val):
[docs] # NOTE: This is used for high byte metas in 32 bit mode only if val < 4: return val + RMETA_LOW8 return (val-4) + RMETA_HIGH8 # Parse modrm as though addr mode might not be just a reg def extended_parse_modrm(self, bytes, offset, opersize, regbase=0):
[docs] """ Return a tuple of (size, Operand) """ mod,reg,rm = self.parse_modrm(ord(bytes[offset])) size = 1 #print "EXTENDED MOD REG RM",mod,reg,rm if mod == 3: # Easy one, just a reg # FIXME only use self.byteRegOffset in 32 bit mode, NOT 64 bit... if opersize == 1: rm = self.byteRegOffset(rm) elif opersize == 2: rm += RMETA_LOW16 #print "OPERSIZE",opersize,rm return (size, i386RegOper(rm+regbase, opersize)) elif mod == 0: # means we are [reg] unless rm == 4 (SIB) or rm == 5 ([imm32]) if rm == 5: imm = e_bits.parsebytes(bytes, offset + size, 4) size += 4 # NOTE: in 64 bit mode, *this* is where we differ, (This case is RIP relative) return(size, i386ImmMemOper(imm, opersize)) elif rm == 4: sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod) size += sibsize if base != None: base += regbase # Adjust for different register addressing modes if index != None: index += regbase # Adjust for different register addressing modes oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale]) return (size, oper) else: return(size, i386RegMemOper(regbase+rm, opersize)) elif mod == 1: # mod 1 means we are [ reg + disp8 ] (unless rm == 4 which means sib + disp8) if rm == 4: sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod) size += sibsize disp = e_bits.parsebytes(bytes, offset+size, 1, sign=True) size += 1 if base != None: base += regbase # Adjust for different register addressing modes if index != None: index += regbase # Adjust for different register addressing modes oper = i386SibOper(opersize, reg=base, index=index, scale=scale_lookup[scale], disp=disp) return (size,oper) else: x = e_bits.signed(ord(bytes[offset+size]), 1) size += 1 return(size, i386RegMemOper(regbase+rm, opersize, disp=x)) elif mod == 2: # Means we are [ reg + disp32 ] (unless rm == 4 which means SIB + disp32) if rm == 4: sibsize, scale, index, base, imm = self.parse_sib(bytes,offset+size,mod) size += sibsize disp = e_bits.parsebytes(bytes, offset + size, 4, sign=True) size += 4 if base != None: base += regbase # Adjust for different register addressing modes if index != None: index += regbase # Adjust for different register addressing modes oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale], disp=disp) return (size, oper) else: # NOTE: Immediate displacements in SIB are still 4 bytes in 64 bit mode disp = e_bits.parsebytes(bytes, offset+size, 4, sign=True) size += 4 return(size, i386RegMemOper(regbase+rm, opersize, disp=disp)) else: raise Exception("How does mod == %d" % mod) def parse_sib(self, bytes, offset, mod):
[docs] """ Return a tuple of (size, scale, index, base, imm) """ byte = ord(bytes[offset]) scale = (byte >> 6) & 0x3 index = (byte >> 3) & 0x7 base = byte & 0x7 imm = None size = 1 # Special SIB case with no index reg if index == 4: index = None # Special SIB case with possible immediate if base == 5: if mod == 0: # [ imm32 + index * scale ] base = None imm = e_bits.parsebytes(bytes, offset+size, 4, sign=False) size += 4 # FIXME is there special stuff needed here? elif mod == 1: pass #raise "OMG MOD 1" elif mod == 2: pass #raise "OMG MOD 2" return (size, scale, index, base, imm) def _dis_calc_tsize(self, opertype, prefixes):
""" Use the oper type and prefixes to decide on the tsize for the operand. """ mode = MODE_32 #print "OPERTYPE",hex(opertype) sizelist = opcode86.OPERSIZE.get(opertype, None) if sizelist == None: raise "OPERSIZE FAIL: %.8x" % opertype if prefixes & PREFIX_OP_SIZE: mode = MODE_16 #print "OPERTYPE",hex(opertype) #print "SIZELIST",repr(sizelist) return sizelist[mode] def disasm(self, bytes, offset, va):
[docs] # Stuff for opcode parsing tabdesc = all_tables[0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 while True: obyte = ord(bytes[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p == None: break if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f: break prefixes |= p offset += 1 continue #pdone = False while True: obyte = ord(bytes[offset]) #print "OBYTE",hex(obyte) if (obyte > tabdesc[4]): #print "Jumping To Overflow Table:", tabdesc[5] tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s' % repr(opdesc) # Hunt down multi-byte opcodes nexttable = opdesc[0] #print "NEXT",nexttable,hex(obyte) if nexttable != 0: # If we have a sub-table specified, use it. #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] tabdesc = all_tables[nexttable] # In the case of 66 0f, the next table is *already* assuming we ate # the 66 *and* the 0f... oblidge them. if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f: offset += 1 # Account for the table jump we made offset += 1 continue # We are now on the final table... #print repr(opdesc) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: #print tabidx #print opdesc #print "OPTTYPE 0" raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16]) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) tsize = self._dis_calc_tsize(opertype, prefixes) #print hex(opertype),hex(addrmeth) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes) else: #print "ADDRTYPE",hex(addrmeth) ameth = self._dis_amethods[addrmeth >> 16] #print "AMETH",ameth if ameth == None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytes, offset+operoffset, tsize, prefixes) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len(operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytes, offset, tsize, prefixes) except struct.error, e: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16]) if oper != None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype, 0) if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags) return ret # Declare all the address method parsers here! def ameth_0(self, operflags, operval, tsize, prefixes):
[docs] # Special address method for opcodes with embedded operands if operflags & opcode86.OP_REG: return i386RegOper(operval, tsize) elif operflags & opcode86.OP_IMM: return i386ImmOper(operval, tsize) raise Exception("Unknown ameth_0! operflags: 0x%.8x" % operflags) def ameth_a(self, bytes, offset, tsize, prefixes):
[docs] imm = e_bits.parsebytes(bytes, offset, tsize) seg = e_bits.parsebytes(bytes, offset+tsize, 2) # THIS BEING GHETTORIGGED ONLY EFFECTS callf jmpf #print "FIXME: envi.intel.ameth_a skipping seg prefix %d" % seg return (tsize+2, i386ImmOper(imm, tsize)) def ameth_e(self, bytes, offset, tsize, prefixes):
[docs] return self.extended_parse_modrm(bytes, offset, tsize) def ameth_n(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (1, i386RegOper(rm + self.ROFFSETMMX, tsize)) def ameth_q(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) if mod == 3: return (1, i386RegOper(rm + self.ROFFSETMMX, tsize)) return self.extended_parse_modrm(bytes, offset, tsize) def ameth_w(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) if mod == 3: return (1, i386RegOper(rm + self.ROFFSETSIMD, tsize)) return self.extended_parse_modrm(bytes, offset, tsize) def ameth_i(self, bytes, offset, tsize, prefixes):
[docs] # FIXME sign extend here if opflags has OP_SIGNED imm = e_bits.parsebytes(bytes, offset, tsize) return (tsize, i386ImmOper(imm, tsize)) def ameth_j(self, bytes, offset, tsize, prefixes):
[docs] imm = e_bits.parsebytes(bytes, offset, tsize, sign=True) return (tsize, i386PcRelOper(imm, tsize)) def ameth_o(self, bytes, offset, tsize, prefixes):
[docs] # NOTE: displacement *stays* 32 bit even with REX # (but 16 bit should probably be supported) imm = e_bits.parsebytes(bytes, offset, 4, sign=False) return (4, i386ImmMemOper(imm, tsize)) def ameth_g(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) if tsize == 1: reg = self.byteRegOffset(reg) elif tsize == 2: reg += RMETA_LOW16 return (0, i386RegOper(reg, tsize)) def ameth_c(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETCTRL, tsize)) def ameth_d(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETDEBUG, tsize)) def ameth_p(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETMMX, tsize)) def ameth_s(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETSEG, tsize)) def ameth_u(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETTEST, tsize)) def ameth_v(self, bytes, offset, tsize, prefixes):
[docs] mod,reg,rm = self.parse_modrm(ord(bytes[offset])) return (0, i386RegOper(reg+self.ROFFSETSIMD, tsize)) def ameth_x(self, bytes, offset, tsize, prefixes):
[docs] #FIXME this needs the DS over-ride, but is only for outsb which we don't support return (0, i386RegMemOper(REG_ESI, tsize)) def ameth_y(self, bytes, offset, tsize, prefixes):
[docs] #FIXME this needs the ES over-ride, but is only for insb which we don't support return (0, i386RegMemOper(REG_ESI, tsize)) if __name__ == '__main__':
# A little helper to make testing easier import sys d = i386Disasm() b = file(sys.argv[1], 'rb').read() offset = 0 va = 0x41414141 while offset < len(b): op = d.disasm(b, offset, va+offset) print '0x%.8x %s %s' % (va+offset, b[offset:offset+len(op)].encode('hex').ljust(16), repr(op)) offset += len(op)