Source code for envi

"""
The Envi framework allows architecutre abstraction through
the use of the ArchitectureModule, Opcode, Operand, and
Emulator objects.
"""

import types
import struct
import platform

# Instruciton flags (The first 8 bits are reserved for arch independant use)
IF_NOFALL = 0x01 # Set if this instruction does *not* fall through
IF_PRIV   = 0x02 # Set if this is a "privileged mode" instruction
IF_CALL   = 0x04 # Set if this instruction branches to a procedure
IF_BRANCH = 0x08 # Set if this instruction branches
IF_RET    = 0x10 # Set if this instruction terminates a procedure

# Branch flags (flags returned by the getBranches() method on an opcode)
BR_PROC  = 1<<0 # The branch target is a procedure (call <foo>)
BR_COND  = 1<<1 # The branch target is conditional (jz <foo>)
BR_DEREF = 1<<2 # the branch target is *dereferenced* into PC (call [0x41414141])
BR_TABLE = 1<<3 # The branch target is the base of a pointer array of jmp/call slots
BR_FALL  = 1<<4 # The branch is a "fall through" to the next instruction

import envi.bits as e_bits
import envi.memory as e_mem
import envi.registers as e_reg
import envi.memcanvas as e_canvas

class ArchitectureModule:
[docs] """ An architecture module implementes methods to deal with the creation of envi objects for the specified architecture. """ def __init__(self, archname, maxinst=32): self._arch_name = archname self._arch_maxinst = maxinst def archGetBreakInstr(self):
[docs] """ Return a python string of the byte sequence which corresponds to a breakpoint (if present) for this architecture. """ raise ArchNotImplemented("archGetBreakInstr") def archGetRegCtx(self):
[docs] """ Return an initialized register context object for the architecture. """ raise ArchNotImplemented("archGetRegCtx") def makeOpcode(self, bytes, offset=0, va=0):
[docs] """ Create a new opcode from the specified bytes (beginning at the specified offset) """ raise ArchNotImplemented("makeOpcode") def getEmulator(self):
[docs] """ Return a default instance of an emulator for the given arch. """ raise ArchNotImplemented("getEmulator") def getPointerSize(self):
[docs] """ Get the size of a pointer in memory on this architecture. """ raise ArchNotImplemented("getPointerSize") def pointerString(self, va):
[docs] """ Return a string representation for a pointer on this arch """ raise ArchNotImplemented("pointerString") def stealArchMethods(obj, archname):
[docs] ''' Used by objects which are expected to inherit from an architecture module but don't know which one until runtime! ''' arch = getArchModule(archname) for name in dir(arch): o = getattr(arch, name, None) if type(o) == types.MethodType: setattr(obj, name, o) class EnviException(Exception):
[docs] def __str__(self): return repr(self) class InvalidInstruction(EnviException):
[docs] """ Raised by opcode parsers when the specified bytes do not represent a valid opcode """ def __init__(self, bytes=None): msg = None if bytes != None: msg = bytes.encode('hex') EnviException.__init__(self, msg) class SegmentationViolation(EnviException):
[docs] """ Raised by an Emulator extension when you bad-touch memory. (Likely from memobj). """ def __init__(self, va, msg=None): if msg == None: msg = "Bad Memory Access: %s" % hex(va) EnviException.__init__(self, msg) self.va = va class ArchNotImplemented(EnviException):
[docs] """ Raised by various Envi components when the architecture does not implement that envi component. """ pass class EmuException(EnviException):
[docs] """ A parent for all emulation exceptions so catching them can be easy. """ def __init__(self, emu, msg=None): EnviException.__init__(self, msg) self.va = emu.getProgramCounter() def __repr__(self): return "%s at %s" % (self.__class__.__name__, hex(self.va)) class UnsupportedInstruction(EmuException):
[docs] """ Raised by emulators when the given instruction is not implemented by the emulator. """ def __init__(self, emu, op): EmuException.__init__(self, emu) self.op = op def __repr__(self): return "Unsupported Instruction: 0x%.8x %s" % (self.va, repr(self.op)) class DivideByZero(EmuException):
[docs] """ Raised by an Emulator when a divide/mod has a 0 divisor... """ class BreakpointHit(EmuException):
[docs] """ Raised by an emulator when you execute a breakpoint instruction """ class PDEUndefinedFlag(EmuException):
[docs] """ This exception is raised when a conditional operation is dependant on a flag state that is unknown. """ class PDEException(EmuException):
[docs] """ This exception is used in partially defined emulation to signal where execution flow becomes un-known due to undefined values. This is considered un-recoverable. """ class UnknownCallingConvention(EmuException):
[docs] """ Raised when the getCallArgs() or setReturnValue() methods are given an unknown calling convention type. """ class MapOverlapException(EnviException):
[docs] """ Raised when adding a memory map to a MemoryObject which overlaps with another already existing map. """ def __init__(self, map1, map2): self.map1 = map1 self.map2 = map2 margs = (map1[0], map1[1], map2[0], map2[1]) EnviException.__init__(self, "Map At 0x%.8x (%d) overlaps map at 0x%.8x (%d)" % margs) class Operand:
[docs] """ Thses are the expected methods needed by any implemented operand object attached to an envi Opcode. This does *not* have a constructor of it's pwn on purpose to cut down on memory use and constructor CPU cost. """ def getOperValue(self, op, emu=None):
[docs] """ Get the current value for the operand. If needed, use the given emulator/workspace/trace to resolve things like memory and registers. NOTE: This API may be passed a None emu and should return what it can (or None if it can't be resolved) """ print "%s needs to implement getOperValue!" % self.__class__.__name__ return None def setOperValue(self, op, emu, val):
[docs] """ Set the current value for the operand. If needed, use the given emulator/workspace/trace to assign things like memory and registers. """ print("%s needs to implement setOperValue! (0x%.8x: %s) " % (self.__class__.__name__, op.va, repr(op))) def isDeref(self):
[docs] """ If the given operand will dereference memory, this method must return True. """ return False def isImmed(self):
[docs] ''' If the given operand represents an immediate value, this must return True. ''' return False def isReg(self):
[docs] ''' If the given operand represents a register value, this must return True. ''' return False def getOperAddr(self, op, emu):
[docs] """ If the operand is a "dereference" operand, this method should use the specified op/emu to resolve the address of the dereference. NOTE: This API may be passed a None emu and should return what it can (or None if it can't be resolved) """ print("%s needs to implement getOperAddr!" % self.__class__.__name__) return None def repr(self, op):
[docs] """ Used by the Opcode class to get a humon readable string for this operand. """ return "unknown" def render(self, mcanv, op, idx):
[docs] """ Used by the opcode class when rendering to a memory canvas. """ mcanv.addText(self.repr(op)) def __ne__(self, op):
return not op == self def __eq__(self, oper): if not isinstance(oper, self.__class__): return False #FIXME each one will need this... return True class DerefOper(Operand):
[docs] def isDeref(self):
[docs] return True class ImmedOper(Operand):
[docs] def isImmed(self):
[docs] return True class RegisterOper(Operand):
[docs] def isReg(self):
[docs] return True class Opcode:
[docs] """ A universal representation for an opcode """ prefix_names = [] # flag->humon tuples def __init__(self, va, opcode, mnem, prefixes, size, operands, iflags=0): """ constructor for the basic Envi Opcode object. Arguments as follows: opcode - An architecture specific numerical value for the opcode mnem - A humon readable mnemonic for the opcode prefixes - a bitmask of architecture specific instruction prefixes size - The size of the opcode in bytes operands - A list of Operand objects for this opcode iflags - A list of Envi (architecture independant) instruction flags (see IF_FOO) va - The virtual address the instruction lives at (used for PC relative immediates etc...) NOTE: If you want to create an architecture spcific opcode, I'd *highly* recommend you just copy/paste in the following simple initial code rather than calling the parent constructor. The extra """ self.opcode = opcode self.mnem = mnem self.prefixes = prefixes self.size = size self.opers = operands self.repr = None self.iflags = iflags self.va = va def __ne__(self, op): return not op == self def __eq__(self, op): if not isinstance(op, Opcode): return False if self.opcode != op.opcode: return False if self.mnem != op.mnem: return False if self.size != op.size: return False if self.iflags != op.iflags: return False if len(self.opers) != len(op.opers): return False for i in range(len(self.opers)): if self.opers[i] != op.opers[i]: return False return True def __hash__(self): return int(hash(self.mnem) ^ (self.size << 4)) def __repr__(self): """ Over-ride this if you want to make arch specific repr. """ return self.mnem + " " + ",".join([o.repr(self) for o in self.opers]) def __len__(self): return int(self.size) # NOTE: From here down is mostly things that architecture specific opcode # extensions should override. def getBranches(self, emu=None):
[docs] """ Return a list of tuples. Each tuple contains the target VA of the branch, and a possible set of flags showing what type of branch it is. See the BR_FOO types for all the supported envi branch flags.... Example: for bva,bflags in op.getBranches(): """ return () def render(self, mcanv):
[docs] """ Render this opcode to the memory canvas passed in. This is used for both simple printing AND more complex representations. """ mcanv.addText(repr(self)) def getPrefixName(self):
[docs] """ Get the name of the prefixes associated with the specified architecture specific prefix bitmask. """ ret = [] for byte,name in self.prefix_names: if self.prefixes & byte: ret.append(name) return "".join(ret) def getOperValue(self, idx, emu=None):
[docs] oper = self.opers[idx] return oper.getOperValue(self, emu=emu) def getOperands(self):
[docs] return list(self.opers) class Emulator(e_reg.RegisterContext, e_mem.MemoryObject):
[docs] """ The Emulator class is mostly "Abstract" in the java Interface sense. The emulator should be able to be extended for the architecutures which are included in the envi framework. You *must* mix in an instance of your architecture abstraction module. (NOTE: Most users will just use an arch mod and call getEmulator()) The intention is for "light weight" emulation to be implemented mostly for user-space emulation of protected mode execution. """ def __init__(self, archmod=None): e_mem.MemoryObject.__init__(self, archmod=archmod) e_reg.RegisterContext.__init__(self) self._emu_segments = [ (0, 0xffffffff), ] self._emu_call_convs = {} # Automagically setup an instruction mnemonic handler dict # by finding all methods starting with i_ and assume they # implement an instruction by mnemonic # FIXME THIS *MUST* GET FASTER FOR UTIL FUNCS! # POSSIBLY DECLARE IN ADVANCE? self.op_methods = {} for name in dir(self): if name.startswith("i_"): self.op_methods[name[2:]] = getattr(self, name) def getArchModule(self):
[docs] raise Exception('Emulators *must* implement getArchModule()!') def getEmuSnap(self):
[docs] """ Return the data needed to "snapshot" this emulator. For most archs, this method will be enough (it takes the memory object, and register values with it) """ regs = self.getRegisterSnap() mem = self.getMemorySnap() return regs,mem def setEmuSnap(self, snap):
[docs] regs,mem = snap self.setRegisterSnap(regs) self.setMemorySnap(mem) def executeOpcode(self, opobj):
[docs] """ This is the core method for the """ raise ArchNotImplemented() def run(self, stepcount=None):
[docs] """ Run the emulator until "something" happens. (breakpoint, segv, syscall, etc...) """ if stepcount != None: for i in xrange(stepcount): self.stepi() else: while True: self.stepi() def stepi(self):
[docs] pc = self.getProgramCounter() op = self.parseOpcode(pc) self.executeOpcode(op) def getSegmentInfo(self, op):
[docs] idx = self.getSegmentIndex(op) return self._emu_segments[idx] def getSegmentIndex(self, op):
[docs] """ The *default* segmentation is none (most arch's will over-ride). This method may be implemented to return a segment index based on either emulator state or properties of the particular instruction in question. """ return 0 def setSegmentInfo(self, idx, base, size):
[docs] ''' Set a base and size for a given segment index. ''' if len(self._emu_segments) - idx == 0: self._emu_segments.append( (base, size) ) return self._emu_segments[idx] = (base,size) def getOperValue(self, op, idx):
[docs] """ Return the value for the operand at index idx for the given opcode reading memory and register states if necissary. In partially-defined emulation, this may return None """ oper = op.opers[idx] return oper.getOperValue(op, self) def getOperAddr(self, op, idx):
[docs] """ Return the address that an operand which deref's memory would read from on getOperValue(). """ oper = op.opers[idx] return oper.getOperAddr(op, self) def setOperValue(self, op, idx, value):
[docs] """ Set the value of the target operand at index idx from opcode op. (obviously OM_IMMEDIATE *cannot* be set) """ oper = op.opers[idx] return oper.setOperValue(op, self, value) def getCallArgs(self, count, cc):
[docs] """ Emulator implementors can implement this method to allow analysis modules a platform/architecture independant way to get stack/reg/whatever args. Usage: getCallArgs(3, "stdcall") -> (0, 32, 0xf00) """ c = self._emu_call_convs.get(cc, None) if c == None: raise UnknownCallingConvention(cc) return c.getCallArgs(self, count) def setReturnValue(self, value, cc, argc=0):
[docs] """ Emulator implementors can implement this method to allow analysis modules a platform/architecture independant way to set a function return value. (this should also take care of any argument cleanup or other return time tasks for the calling convention) """ c = self._emu_call_convs.get(cc, None) if c == None: raise UnknownCallingConvention(cc) return c.setReturnValue(self, value, argc) def addCallingConvention(self, name, obj):
[docs] self._emu_call_convs[name] = obj def hasCallingConvention(self, name):
[docs] if self._emu_call_convs.get(name) != None: return True return False def getCallingConvention(self, name):
[docs] return self._emu_call_conv.get(name) def getCallingConventions(self):
[docs] return self._emu_call_convs.items() class CallingConvention:
[docs] """ Implement calling conventions for your arch. """ def setReturnValue(self, emu, value, ccinfo=None):
[docs] pass def getCallArgs(self, emu, count):
[docs] pass # If you want your arch to use symbolik emulation... def getSymbolikArgs(self, emu, argv):
[docs] raise Exception('getSymbolikArgs() not in %s' % self.__class__.__name__) def setSymbolikReturn(self, emu, sym, argv):
[docs] raise Exception('setSymbolikReturn() not in %s' % self.__class__.__name__) # NOTE: This mapping is needed because of inconsistancies # in how different compilers and versions of python embed # the machine setting. arch_xlate_32 = {
'i386':'i386', 'i486':'i386', 'i586':'i386', 'i686':'i386', 'x86':'i386', 'i86pc':'i386', # Solaris '':'i386', # Stupid windows... 'AMD64':'i386', # ActiveState python can say AMD64 in 32 bit install? } arch_xlate_64 = { 'x86_64':'amd64', 'AMD64':'amd64', 'amd64':'amd64', 'i386':'amd64', # MAC ports builds are 64bit and say i386 '':'amd64', # And again.... } def getCurrentArch():
[docs] """ Return an envi normalized name for the current arch. """ width = struct.calcsize("P") mach = platform.machine() # 'i386','ppc', etc... if width == 4: ret = arch_xlate_32.get(mach) elif width == 8: ret = arch_xlate_64.get(mach) if ret == None: raise ArchNotImplemented(mach) return ret def getArchModule(name=None):
[docs] """ return an Envi architecture module instance for the following architecture name. Current architectures include: i386 - Intel i386 amd64 - The new 64bit AMD spec. """ if name == None: name = getCurrentArch() # Some builds have x86 (py2.6) and some have other stuff... if name in ["i386","i486","i586","i686","x86"]: import envi.archs.i386 as e_i386 return e_i386.i386Module() elif name == "amd64": import envi.archs.amd64 as e_amd64 return e_amd64.Amd64Module() elif name == 'arm': import envi.archs.arm as e_arm return e_arm.ArmModule() else: raise ArchNotImplemented(name)