Package envi
[hide private]
[frames] | no frames]

Source Code for Package envi

  1   
  2  """ 
  3  The Envi framework allows architecutre abstraction through 
  4  the use of the ArchitectureModule, Opcode, Operand, and 
  5  Emulator objects. 
  6  """ 
  7   
  8  import types 
  9  import struct 
 10  import platform 
 11   
 12  # Instruciton flags (The first 8 bits are reserved for arch independant use) 
 13  IF_NOFALL = 0x01 # Set if this instruction does *not* fall through 
 14  IF_PRIV   = 0x02 # Set if this is a "privileged mode" instruction 
 15  IF_CALL   = 0x04 # Set if this instruction branches to a procedure 
 16  IF_BRANCH = 0x08 # Set if this instruction branches 
 17  IF_RET    = 0x10 # Set if this instruction terminates a procedure 
 18   
 19  # Branch flags (flags returned by the getBranches() method on an opcode) 
 20  BR_PROC  = 1<<0 # The branch target is a procedure (call <foo>) 
 21  BR_COND  = 1<<1 # The branch target is conditional (jz <foo>) 
 22  BR_DEREF = 1<<2 # the branch target is *dereferenced* into PC (call [0x41414141]) 
 23  BR_TABLE = 1<<3 # The branch target is the base of a pointer array of jmp/call slots 
 24  BR_FALL  = 1<<4 # The branch is a "fall through" to the next instruction 
 25   
 26  import envi.bits as e_bits 
 27  import envi.memory as e_mem 
 28  import envi.registers as e_reg 
 29  import envi.memcanvas as e_canvas 
 30   
31 -class ArchitectureModule:
32 """ 33 An architecture module implementes methods to deal 34 with the creation of envi objects for the specified 35 architecture. 36 """
37 - def __init__(self, archname, maxinst=32):
38 self._arch_name = archname 39 self._arch_maxinst = maxinst
40
41 - def archGetBreakInstr(self):
42 """ 43 Return a python string of the byte sequence which corresponds to 44 a breakpoint (if present) for this architecture. 45 """ 46 raise ArchNotImplemented("archGetBreakInstr")
47
48 - def archGetRegCtx(self):
49 """ 50 Return an initialized register context object for the architecture. 51 """ 52 raise ArchNotImplemented("archGetRegCtx")
53
54 - def makeOpcode(self, bytes, offset=0, va=0):
55 """ 56 Create a new opcode from the specified bytes (beginning 57 at the specified offset) 58 """ 59 raise ArchNotImplemented("makeOpcode")
60
61 - def getEmulator(self):
62 """ 63 Return a default instance of an emulator for the given arch. 64 """ 65 raise ArchNotImplemented("getEmulator")
66
67 - def getPointerSize(self):
68 """ 69 Get the size of a pointer in memory on this architecture. 70 """ 71 raise ArchNotImplemented("getPointerSize")
72
73 - def pointerString(self, va):
74 """ 75 Return a string representation for a pointer on this arch 76 """ 77 raise ArchNotImplemented("pointerString")
78
79 -def stealArchMethods(obj, archname):
80 ''' 81 Used by objects which are expected to inherit from an 82 architecture module but don't know which one until runtime! 83 ''' 84 arch = getArchModule(archname) 85 for name in dir(arch): 86 o = getattr(arch, name, None) 87 if type(o) == types.MethodType: 88 setattr(obj, name, o)
89
90 -class EnviException(Exception):
91 - def __str__(self):
92 return repr(self)
93
94 -class InvalidInstruction(EnviException):
95 """ 96 Raised by opcode parsers when the specified 97 bytes do not represent a valid opcode 98 """
99 - def __init__(self, bytes=None):
100 msg = None 101 if bytes != None: 102 msg = bytes.encode('hex') 103 EnviException.__init__(self, msg)
104
105 -class SegmentationViolation(EnviException):
106 """ 107 Raised by an Emulator extension when you 108 bad-touch memory. (Likely from memobj). 109 """
110 - def __init__(self, va, msg=None):
111 if msg == None: 112 msg = "Bad Memory Access: %s" % hex(va) 113 EnviException.__init__(self, msg) 114 self.va = va
115
116 -class ArchNotImplemented(EnviException):
117 """ 118 Raised by various Envi components when the architecture 119 does not implement that envi component. 120 """ 121 pass
122
123 -class EmuException(EnviException):
124 """ 125 A parent for all emulation exceptions so catching 126 them can be easy. 127 """
128 - def __init__(self, emu, msg=None):
129 EnviException.__init__(self, msg) 130 self.va = emu.getProgramCounter()
131
132 - def __repr__(self):
133 return "%s at %s" % (self.__class__.__name__, hex(self.va))
134
135 -class UnsupportedInstruction(EmuException):
136 """ 137 Raised by emulators when the given instruction 138 is not implemented by the emulator. 139 """
140 - def __init__(self, emu, op):
141 EmuException.__init__(self, emu) 142 self.op = op
143
144 - def __repr__(self):
145 return "Unsupported Instruction: 0x%.8x %s" % (self.va, repr(self.op))
146
147 -class DivideByZero(EmuException):
148 """ 149 Raised by an Emulator when a divide/mod has 150 a 0 divisor... 151 """
152
153 -class BreakpointHit(EmuException):
154 """ 155 Raised by an emulator when you execute a breakpoint instruction 156 """
157
158 -class PDEUndefinedFlag(EmuException):
159 """ 160 This exception is raised when a conditional operation is dependant on 161 a flag state that is unknown. 162 """
163
164 -class PDEException(EmuException):
165 """ 166 This exception is used in partially defined emulation to signal where 167 execution flow becomes un-known due to undefined values. This is considered 168 un-recoverable. 169 """
170
171 -class UnknownCallingConvention(EmuException):
172 """ 173 Raised when the getCallArgs() or setReturnValue() methods 174 are given an unknown calling convention type. 175 """
176
177 -class MapOverlapException(EnviException):
178 """ 179 Raised when adding a memory map to a MemoryObject which overlaps 180 with another already existing map. 181 """
182 - def __init__(self, map1, map2):
183 self.map1 = map1 184 self.map2 = map2 185 margs = (map1[0], map1[1], map2[0], map2[1]) 186 EnviException.__init__(self, "Map At 0x%.8x (%d) overlaps map at 0x%.8x (%d)" % margs)
187
188 -class Operand:
189 190 """ 191 Thses are the expected methods needed by any implemented operand object 192 attached to an envi Opcode. This does *not* have a constructor of it's 193 pwn on purpose to cut down on memory use and constructor CPU cost. 194 """ 195
196 - def getOperValue(self, op, emu=None):
197 """ 198 Get the current value for the operand. If needed, use 199 the given emulator/workspace/trace to resolve things like 200 memory and registers. 201 202 NOTE: This API may be passed a None emu and should return what it can 203 (or None if it can't be resolved) 204 """ 205 print "%s needs to implement getOperValue!" % self.__class__.__name__ 206 return None
207
208 - def setOperValue(self, op, emu, val):
209 """ 210 Set the current value for the operand. If needed, use 211 the given emulator/workspace/trace to assign things like 212 memory and registers. 213 """ 214 print("%s needs to implement setOperValue! (0x%.8x: %s) " % (self.__class__.__name__, op.va, repr(op)))
215
216 - def isDeref(self):
217 """ 218 If the given operand will dereference memory, this method must return True. 219 """ 220 return False
221
222 - def isImmed(self):
223 ''' 224 If the given operand represents an immediate value, this must return True. 225 ''' 226 return False
227
228 - def isReg(self):
229 ''' 230 If the given operand represents a register value, this must return True. 231 ''' 232 return False
233
234 - def getOperAddr(self, op, emu):
235 """ 236 If the operand is a "dereference" operand, this method should use the 237 specified op/emu to resolve the address of the dereference. 238 239 NOTE: This API may be passed a None emu and should return what it can 240 (or None if it can't be resolved) 241 """ 242 print("%s needs to implement getOperAddr!" % self.__class__.__name__) 243 return None
244
245 - def repr(self, op):
246 """ 247 Used by the Opcode class to get a humon readable string for this operand. 248 """ 249 return "unknown"
250
251 - def render(self, mcanv, op, idx):
252 """ 253 Used by the opcode class when rendering to a memory canvas. 254 """ 255 mcanv.addText(self.repr(op))
256
257 - def __ne__(self, op):
258 return not op == self
259
260 - def __eq__(self, oper):
261 if not isinstance(oper, self.__class__): 262 return False 263 #FIXME each one will need this... 264 return True
265
266 -class DerefOper(Operand):
267
268 - def isDeref(self):
269 return True
270
271 -class ImmedOper(Operand):
272
273 - def isImmed(self):
274 return True
275
276 -class RegisterOper(Operand):
277
278 - def isReg(self):
279 return True
280
281 -class Opcode:
282 """ 283 A universal representation for an opcode 284 """ 285 prefix_names = [] # flag->humon tuples 286
287 - def __init__(self, va, opcode, mnem, prefixes, size, operands, iflags=0):
288 """ 289 constructor for the basic Envi Opcode object. Arguments as follows: 290 291 opcode - An architecture specific numerical value for the opcode 292 mnem - A humon readable mnemonic for the opcode 293 prefixes - a bitmask of architecture specific instruction prefixes 294 size - The size of the opcode in bytes 295 operands - A list of Operand objects for this opcode 296 iflags - A list of Envi (architecture independant) instruction flags (see IF_FOO) 297 va - The virtual address the instruction lives at (used for PC relative immediates etc...) 298 299 NOTE: If you want to create an architecture spcific opcode, I'd *highly* recommend you 300 just copy/paste in the following simple initial code rather than calling the parent 301 constructor. The extra 302 """ 303 self.opcode = opcode 304 self.mnem = mnem 305 self.prefixes = prefixes 306 self.size = size 307 self.opers = operands 308 self.repr = None 309 self.iflags = iflags 310 self.va = va
311
312 - def __ne__(self, op):
313 return not op == self
314
315 - def __eq__(self, op):
316 if not isinstance(op, Opcode): 317 return False 318 if self.opcode != op.opcode: 319 return False 320 if self.mnem != op.mnem: 321 return False 322 if self.size != op.size: 323 return False 324 if self.iflags != op.iflags: 325 return False 326 if len(self.opers) != len(op.opers): 327 return False 328 for i in range(len(self.opers)): 329 if self.opers[i] != op.opers[i]: 330 return False 331 return True
332
333 - def __hash__(self):
334 return int(hash(self.mnem) ^ (self.size << 4))
335
336 - def __repr__(self):
337 """ 338 Over-ride this if you want to make arch specific repr. 339 """ 340 return self.mnem + " " + ",".join([o.repr(self) for o in self.opers])
341
342 - def __len__(self):
343 return int(self.size)
344 345 346 # NOTE: From here down is mostly things that architecture specific opcode 347 # extensions should override.
348 - def getBranches(self, emu=None):
349 """ 350 Return a list of tuples. Each tuple contains the target VA of the 351 branch, and a possible set of flags showing what type of branch it is. 352 353 See the BR_FOO types for all the supported envi branch flags.... 354 Example: for bva,bflags in op.getBranches(): 355 """ 356 return ()
357
358 - def render(self, mcanv):
359 """ 360 Render this opcode to the memory canvas passed in. This is used for both 361 simple printing AND more complex representations. 362 """ 363 mcanv.addText(repr(self))
364
365 - def getPrefixName(self):
366 """ 367 Get the name of the prefixes associated with the specified 368 architecture specific prefix bitmask. 369 """ 370 ret = [] 371 for byte,name in self.prefix_names: 372 if self.prefixes & byte: 373 ret.append(name) 374 return "".join(ret)
375
376 - def getOperValue(self, idx, emu=None):
377 oper = self.opers[idx] 378 return oper.getOperValue(self, emu=emu)
379
380 - def getOperands(self):
381 return list(self.opers)
382
383 -class Emulator(e_reg.RegisterContext, e_mem.MemoryObject):
384 """ 385 The Emulator class is mostly "Abstract" in the java 386 Interface sense. The emulator should be able to 387 be extended for the architecutures which are included 388 in the envi framework. You *must* mix in 389 an instance of your architecture abstraction module. 390 391 (NOTE: Most users will just use an arch mod and call getEmulator()) 392 393 The intention is for "light weight" emulation to be 394 implemented mostly for user-space emulation of 395 protected mode execution. 396 """
397 - def __init__(self, archmod=None):
398 399 e_mem.MemoryObject.__init__(self, archmod=archmod) 400 e_reg.RegisterContext.__init__(self) 401 402 self._emu_segments = [ (0, 0xffffffff), ] 403 self._emu_call_convs = {} 404 405 # Automagically setup an instruction mnemonic handler dict 406 # by finding all methods starting with i_ and assume they 407 # implement an instruction by mnemonic 408 # FIXME THIS *MUST* GET FASTER FOR UTIL FUNCS! 409 # POSSIBLY DECLARE IN ADVANCE? 410 self.op_methods = {} 411 for name in dir(self): 412 if name.startswith("i_"): 413 self.op_methods[name[2:]] = getattr(self, name)
414
415 - def getArchModule(self):
416 raise Exception('Emulators *must* implement getArchModule()!')
417
418 - def getEmuSnap(self):
419 """ 420 Return the data needed to "snapshot" this emulator. For most 421 archs, this method will be enough (it takes the memory object, 422 and register values with it) 423 """ 424 regs = self.getRegisterSnap() 425 mem = self.getMemorySnap() 426 return regs,mem
427
428 - def setEmuSnap(self, snap):
429 regs,mem = snap 430 self.setRegisterSnap(regs) 431 self.setMemorySnap(mem)
432
433 - def executeOpcode(self, opobj):
434 """ 435 This is the core method for the 436 """ 437 raise ArchNotImplemented()
438
439 - def run(self, stepcount=None):
440 """ 441 Run the emulator until "something" happens. 442 (breakpoint, segv, syscall, etc...) 443 """ 444 if stepcount != None: 445 for i in xrange(stepcount): 446 self.stepi() 447 else: 448 while True: 449 self.stepi()
450
451 - def stepi(self):
452 pc = self.getProgramCounter() 453 op = self.parseOpcode(pc) 454 self.executeOpcode(op)
455
456 - def getSegmentInfo(self, op):
457 idx = self.getSegmentIndex(op) 458 return self._emu_segments[idx]
459
460 - def getSegmentIndex(self, op):
461 """ 462 The *default* segmentation is none (most arch's will over-ride). 463 This method may be implemented to return a segment index based on either 464 emulator state or properties of the particular instruction in question. 465 """ 466 return 0
467
468 - def setSegmentInfo(self, idx, base, size):
469 ''' 470 Set a base and size for a given segment index. 471 ''' 472 if len(self._emu_segments) - idx == 0: 473 self._emu_segments.append( (base, size) ) 474 return 475 476 self._emu_segments[idx] = (base,size)
477
478 - def getOperValue(self, op, idx):
479 """ 480 Return the value for the operand at index idx for 481 the given opcode reading memory and register states if necissary. 482 483 In partially-defined emulation, this may return None 484 """ 485 oper = op.opers[idx] 486 return oper.getOperValue(op, self)
487
488 - def getOperAddr(self, op, idx):
489 """ 490 Return the address that an operand which deref's memory 491 would read from on getOperValue(). 492 """ 493 oper = op.opers[idx] 494 return oper.getOperAddr(op, self)
495
496 - def setOperValue(self, op, idx, value):
497 """ 498 Set the value of the target operand at index idx from 499 opcode op. 500 (obviously OM_IMMEDIATE *cannot* be set) 501 """ 502 oper = op.opers[idx] 503 return oper.setOperValue(op, self, value)
504
505 - def getCallArgs(self, count, cc):
506 """ 507 Emulator implementors can implement this method to allow 508 analysis modules a platform/architecture independant way 509 to get stack/reg/whatever args. 510 511 Usage: getCallArgs(3, "stdcall") -> (0, 32, 0xf00) 512 """ 513 c = self._emu_call_convs.get(cc, None) 514 if c == None: 515 raise UnknownCallingConvention(cc) 516 517 return c.getCallArgs(self, count)
518
519 - def setReturnValue(self, value, cc, argc=0):
520 """ 521 Emulator implementors can implement this method to allow 522 analysis modules a platform/architecture independant way 523 to set a function return value. (this should also take 524 care of any argument cleanup or other return time tasks 525 for the calling convention) 526 """ 527 c = self._emu_call_convs.get(cc, None) 528 if c == None: 529 raise UnknownCallingConvention(cc) 530 531 return c.setReturnValue(self, value, argc)
532
533 - def addCallingConvention(self, name, obj):
534 self._emu_call_convs[name] = obj
535
536 - def hasCallingConvention(self, name):
537 if self._emu_call_convs.get(name) != None: 538 return True 539 return False
540
541 - def getCallingConvention(self, name):
542 return self._emu_call_conv.get(name)
543
544 - def getCallingConventions(self):
545 return self._emu_call_convs.items()
546
547 -class CallingConvention:
548 """ 549 Implement calling conventions for your arch. 550 """
551 - def setReturnValue(self, emu, value, ccinfo=None):
552 pass
553
554 - def getCallArgs(self, emu, count):
555 pass
556 557 # If you want your arch to use symbolik emulation...
558 - def getSymbolikArgs(self, emu, argv):
559 raise Exception('getSymbolikArgs() not in %s' % self.__class__.__name__)
560
561 - def setSymbolikReturn(self, emu, sym, argv):
562 raise Exception('setSymbolikReturn() not in %s' % self.__class__.__name__)
563 564 # NOTE: This mapping is needed because of inconsistancies 565 # in how different compilers and versions of python embed 566 # the machine setting. 567 arch_xlate_32 = { 568 'i386':'i386', 569 'i486':'i386', 570 'i586':'i386', 571 'i686':'i386', 572 'x86':'i386', 573 'i86pc':'i386', # Solaris 574 '':'i386', # Stupid windows... 575 'AMD64':'i386', # ActiveState python can say AMD64 in 32 bit install? 576 } 577 578 arch_xlate_64 = { 579 'x86_64':'amd64', 580 'AMD64':'amd64', 581 'amd64':'amd64', 582 'i386':'amd64', # MAC ports builds are 64bit and say i386 583 '':'amd64', # And again.... 584 } 585
586 -def getCurrentArch():
587 """ 588 Return an envi normalized name for the current arch. 589 """ 590 width = struct.calcsize("P") 591 mach = platform.machine() # 'i386','ppc', etc... 592 593 if width == 4: 594 ret = arch_xlate_32.get(mach) 595 596 elif width == 8: 597 ret = arch_xlate_64.get(mach) 598 599 if ret == None: 600 raise ArchNotImplemented(mach) 601 602 return ret
603
604 -def getArchModule(name=None):
605 """ 606 return an Envi architecture module instance for the following 607 architecture name. 608 609 Current architectures include: 610 611 i386 - Intel i386 612 amd64 - The new 64bit AMD spec. 613 """ 614 if name == None: 615 name = getCurrentArch() 616 617 # Some builds have x86 (py2.6) and some have other stuff... 618 if name in ["i386","i486","i586","i686","x86"]: 619 import envi.archs.i386 as e_i386 620 return e_i386.i386Module() 621 622 elif name == "amd64": 623 import envi.archs.amd64 as e_amd64 624 return e_amd64.Amd64Module() 625 626 elif name == 'arm': 627 import envi.archs.arm as e_arm 628 return e_arm.ArmModule() 629 630 else: 631 raise ArchNotImplemented(name)
632