Package envi :: Package archs :: Package i386 :: Module disasm
[hide private]
[frames] | no frames]

Source Code for Module envi.archs.i386.disasm

   1  
 
   2  """
 
   3  The guts for the i386 envi opcode disassembler.
 
   4  """ 
   5  
 
   6  import struct 
   7  
 
   8  import envi 
   9  import envi.bits as e_bits 
  10  
 
  11  import opcode86 
  12  all_tables = opcode86.tables86 
  13  
 
  14  # Grab our register enums etc...
 
  15  from envi.archs.i386.regs import * 
  16  
 
  17  # Our instruction prefix masks
 
  18  # NOTE: table 3-4 (section 3.6) of intel 1 shows how REX/OP_SIZE
 
  19  # interact...
 
  20  INSTR_PREFIX=      0x0001 
  21  PREFIX_LOCK =      0x0002 
  22  PREFIX_REPNZ=      0x0004 
  23  PREFIX_REPZ =      0x0008 
  24  PREFIX_REP  =      0x0010 
  25  PREFIX_REP_SIMD=   0x0020 
  26  PREFIX_OP_SIZE=    0x0040 
  27  PREFIX_ADDR_SIZE=  0x0080 
  28  PREFIX_SIMD=       0x0100 
  29  PREFIX_CS  =       0x0200 
  30  PREFIX_SS  =       0x0400 
  31  PREFIX_DS  =       0x0800 
  32  PREFIX_ES  =       0x1000 
  33  PREFIX_FS  =       0x2000 
  34  PREFIX_GS  =       0x4000 
  35  PREFIX_REG_MASK=   0x8000 
  36  
 
  37  # envi.registers meta offsets
 
  38  RMETA_LOW8  = 0x00080000 
  39  RMETA_HIGH8 = 0x08080000 
  40  RMETA_LOW16 = 0x00100000 
  41  
 
  42  # Use a list here instead of a dict for speed (max 255 anyway)
 
  43  i386_prefixes = [ None for i in range(256) ] 
  44  i386_prefixes[0xF0] = PREFIX_LOCK 
  45  i386_prefixes[0xF2] = PREFIX_REPNZ 
  46  i386_prefixes[0xF3] = PREFIX_REP 
  47  i386_prefixes[0x2E] = PREFIX_CS 
  48  i386_prefixes[0x36] = PREFIX_SS 
  49  i386_prefixes[0x3E] = PREFIX_DS 
  50  i386_prefixes[0x26] = PREFIX_ES 
  51  i386_prefixes[0x64] = PREFIX_FS 
  52  i386_prefixes[0x65] = PREFIX_GS 
  53  i386_prefixes[0x66] = PREFIX_OP_SIZE 
  54  i386_prefixes[0x67] = PREFIX_ADDR_SIZE 
  55  
 
  56  # The scale byte index into this for multiplier imm
 
  57  scale_lookup = (1, 2, 4, 8) 
  58  
 
  59  # A set of instructions that are considered privileged (mark with IF_PRIV)
 
  60  # FIXME this should be part of the opcdode tables!
 
  61  priv_lookup = {
 
  62      "int":True,
 
  63      "in":True,
 
  64      "out":True,
 
  65      "insb":True,
 
  66      "outsb":True,
 
  67      "insd":True,
 
  68      "outsd":True,
 
  69      "vmcall":True,
 
  70      "vmlaunch":True,
 
  71      "vmresume":True,
 
  72      "vmxoff":True,
 
  73      "vmread":True,
 
  74      "vmwrite":True,
 
  75      "rsm":True,
 
  76      "lar":True,
 
  77      "lsl":True,
 
  78      "clts":True,
 
  79      "invd":True,
 
  80      "wbinvd":True,
 
  81      "wrmsr":True,
 
  82      "rdmsr":True,
 
  83      "sysexit":True,
 
  84      "lgdt":True,
 
  85      "lidt":True,
 
  86      "lmsw":True,
 
  87      "monitor":True,
 
  88      "mwait":True,
 
  89      "vmclear":True,
 
  90      "vmptrld":True,
 
  91      "vmptrst":True,
 
  92      "vmxon":True,
 
  93  } 
  94  
 
  95  # Map of codes to their respective envi flags
 
  96  iflag_lookup = {
 
  97      opcode86.INS_RET: envi.IF_NOFALL|envi.IF_RET,
 
  98      opcode86.INS_CALL: envi.IF_CALL,
 
  99      opcode86.INS_HALT: envi.IF_NOFALL,
 
 100      opcode86.INS_CALLCC: envi.IF_CALL,
 
 101      opcode86.INS_BRANCH: envi.IF_NOFALL | envi.IF_BRANCH,
 
 102      opcode86.INS_BRANCHCC: envi.IF_BRANCH,
 
 103  } 
 104  
 
 105  sizenames = ["" for x in range(17)] 
 106  sizenames[1] = "byte" 
 107  sizenames[2] = "word" 
 108  sizenames[4] = "dword" 
 109  sizenames[8] = "qword" 
 110  sizenames[16] = "oword" 
 111  
 
112 -def addrToName(mcanv, va):
113 sym = mcanv.syms.getSymByAddr(va) 114 if sym != None: 115 return repr(sym) 116 return "0x%.8x" % va
117 118 ########################################################################### 119 # 120 # Operand objects for the i386 architecture 121 # 122 123
124 -class i386RegOper(envi.RegisterOper):
125
126 - def __init__(self, reg, tsize):
127 self.reg = reg 128 self.tsize = tsize
129
130 - def repr(self, op):
131 return self._dis_regctx.getRegisterName(self.reg)
132
133 - def getOperValue(self, op, emu=None):
134 if emu == None: return None # This operand type requires an emulator 135 return emu.getRegister(self.reg)
136
137 - def setOperValue(self, op, emu, value):
138 emu.setRegister(self.reg, value)
139
140 - def render(self, mcanv, op, idx):
141 hint = mcanv.syms.getSymHint(op.va, idx) 142 if hint != None: 143 mcanv.addNameText(name, typename="registers") 144 else: 145 name = self._dis_regctx.getRegisterName(self.reg) 146 mcanv.addNameText(name, typename="registers")
147
148 - def __eq__(self, other):
149 if not isinstance(other, i386RegOper): 150 return False 151 if other.reg != self.reg: 152 return False 153 if other.tsize != self.tsize: 154 return False 155 return True
156
157 -class i386ImmOper(envi.ImmedOper):
158 """ 159 An operand representing an immediate. 160 """
161 - def __init__(self, imm, tsize):
162 self.imm = imm 163 self.tsize = tsize
164
165 - def repr(self, op):
166 ival = self.imm 167 if ival > 4096: 168 return "0x%.8x" % ival 169 return str(ival)
170
171 - def getOperValue(self, op, emu=None):
172 return self.imm
173
174 - def render(self, mcanv, op, idx):
175 value = self.imm 176 hint = mcanv.syms.getSymHint(op.va, idx) 177 if hint != None: 178 if mcanv.mem.isValidPointer(value): 179 mcanv.addVaText(hint, value) 180 else: 181 mcanv.addNameText(hint) 182 elif mcanv.mem.isValidPointer(value): 183 name = addrToName(mcanv, value) 184 mcanv.addVaText(name, value) 185 else: 186 if self.imm >= 4096: 187 mcanv.addNameText('0x%.8x' % value) 188 else: 189 mcanv.addNameText(str(value))
190
191 - def __eq__(self, other):
192 if not isinstance(other, i386ImmOper): 193 return False 194 if other.imm != self.imm: 195 return False 196 if other.tsize != self.tsize: 197 return False 198 return True
199
200 -class i386PcRelOper(envi.Operand):
201 """ 202 This is the operand used for EIP relative offsets 203 for operands on instructions like jmp/call 204 """
205 - def __init__(self, imm, tsize):
206 self.imm = imm 207 self.tsize = tsize
208
209 - def repr(self, op):
210 return "0x%.8x" % (op.va + op.size + self.imm)
211
212 - def isImmed(self):
213 return True # FIXME trying this out....
214
215 - def getOperValue(self, op, emu=None):
216 return op.va + op.size + self.imm
217
218 - def render(self, mcanv, op, idx):
219 hint = mcanv.syms.getSymHint(op.va, idx) 220 if hint != None: 221 mcanv.addVaText(hint, value) 222 else: 223 value = op.va + op.size + self.imm 224 name = addrToName(mcanv, value) 225 mcanv.addVaText(name, value)
226
227 - def __eq__(self, other):
228 if not isinstance(other, i386PcRelOper): 229 return False 230 if other.imm != self.imm: 231 return False 232 if other.tsize != self.tsize: 233 return False 234 return True
235
236 -class i386RegMemOper(envi.DerefOper):
237 """ 238 An operand which represents the result of reading/writting memory from the 239 dereference (with possible displacement) from a given register. 240 """
241 - def __init__(self, reg, tsize, disp=0):
242 self.reg = reg 243 self.tsize = tsize 244 self.disp = disp 245 self._is_deref = True
246
247 - def repr(self, op):
248 r = self._dis_regctx.getRegisterName(self.reg) 249 if self.disp > 0: 250 return "%s [%s + %d]" % (sizenames[self.tsize],r,self.disp) 251 elif self.disp < 0: 252 return "%s [%s - %d]" % (sizenames[self.tsize],r,abs(self.disp)) 253 return "%s [%s]" % (sizenames[self.tsize],r)
254
255 - def getOperValue(self, op, emu=None):
256 if emu == None: return None # This operand type requires an emulator 257 return emu.readMemValue(self.getOperAddr(op, emu), self.tsize)
258
259 - def setOperValue(self, op, emu, val):
260 emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize)
261
262 - def getOperAddr(self, op, emu):
263 if emu == None: return None # This operand type requires an emulator 264 base, size = emu.getSegmentInfo(op) 265 rval = emu.getRegister(self.reg) 266 return base + rval + self.disp
267
268 - def isDeref(self):
269 # The disassembler may reach in and set this (if lea...) 270 return self._is_deref
271
272 - def render(self, mcanv, op, idx):
273 mcanv.addNameText(sizenames[self.tsize]) 274 mcanv.addText(" [") 275 mcanv.addNameText(self._dis_regctx.getRegisterName(self.reg), typename="registers") 276 hint = mcanv.syms.getSymHint(op.va, idx) 277 if hint != None: 278 mcanv.addText(" + ") 279 mcanv.addNameText(hint) 280 281 else: 282 if self.disp > 0: 283 mcanv.addText(" + ") 284 mcanv.addNameText(str(self.disp)) 285 elif self.disp < 0: 286 mcanv.addText(" - ") 287 mcanv.addNameText(str(abs(self.disp))) 288 mcanv.addText("]")
289
290 - def __eq__(self, other):
291 if not isinstance(other, i386RegMemOper): 292 return False 293 if other.reg != self.reg: 294 return False 295 if other.disp != self.disp: 296 return False 297 if other.tsize != self.tsize: 298 return False 299 return True
300
301 -class i386ImmMemOper(envi.DerefOper):
302 """ 303 An operand which represents the dereference (memory read/write) of 304 a memory location associated with an immediate. 305 """
306 - def __init__(self, imm, tsize):
307 self.imm = imm 308 self.tsize = tsize 309 self._is_deref = True
310
311 - def isDeref(self):
312 # The disassembler may reach in and set this (if lea...) 313 return self._is_deref
314
315 - def repr(self, op):
316 return "%s [0x%.8x]" % (sizenames[self.tsize], self.imm)
317
318 - def getOperValue(self, op, emu=None):
319 if emu == None: return None # This operand type requires an emulator 320 return emu.readMemValue(self.getOperAddr(op, emu), self.tsize)
321
322 - def setOperValue(self, op, emu, val):
323 emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize)
324
325 - def getOperAddr(self, op, emu=None):
326 ret = self.imm 327 if emu != None: 328 base, size = emu.getSegmentInfo(op) 329 ret += base 330 return ret
331
332 - def render(self, mcanv, op, idx):
333 mcanv.addNameText(sizenames[self.tsize]) 334 mcanv.addText(" [") 335 value = self.imm 336 337 hint = mcanv.syms.getSymHint(op.va, idx) 338 if hint != None: 339 mcanv.addVaText(hint, value) 340 else: 341 name = addrToName(mcanv, value) 342 mcanv.addVaText(name, value) 343 344 mcanv.addText("]")
345
346 - def __eq__(self, other):
347 if not isinstance(other, i386ImmMemOper): 348 return False 349 if other.imm != self.imm: 350 return False 351 if other.tsize != self.tsize: 352 return False 353 return True
354
355 -class i386SibOper(envi.DerefOper):
356 """ 357 An operand which represents the result of reading/writting memory from the 358 dereference (with possible displacement) from a given register. 359 """
360 - def __init__(self, tsize, reg=None, imm=None, index=None, scale=1, disp=0):
361 self.reg = reg 362 self.imm = imm 363 self.index = index 364 self.scale = scale 365 self.tsize = tsize 366 self.disp = disp 367 self._is_deref = True
368
369 - def __eq__(self, other):
370 if not isinstance(other, i386SibOper): 371 return False 372 if other.imm != self.imm: 373 return False 374 if other.reg != self.reg: 375 return False 376 if other.index != self.index: 377 return False 378 if other.scale != self.scale: 379 return False 380 if other.disp != self.disp: 381 return False 382 if other.tsize != self.tsize: 383 return False 384 return True
385
386 - def isDeref(self):
387 return self._is_deref
388
389 - def repr(self, op):
390 391 r = "%s [" % sizenames[self.tsize] 392 393 if self.reg != None: 394 r += self._dis_regctx.getRegisterName(self.reg) 395 396 if self.imm != None: 397 r += "0x%.8x" % self.imm 398 399 if self.index != None: 400 r += " + %s" % self._dis_regctx.getRegisterName(self.index) 401 if self.scale != 1: 402 r += " * %d" % self.scale 403 404 if self.disp > 0: 405 r += " + %d" % self.disp 406 elif self.disp < 0: 407 r += " - %d" % abs(self.disp) 408 409 r += "]" 410 411 return r
412
413 - def getOperValue(self, op, emu=None):
414 if emu == None: return None # This operand type requires an emulator 415 return emu.readMemValue(self.getOperAddr(op, emu), self.tsize)
416
417 - def setOperValue(self, op, emu, val):
418 emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize)
419
420 - def getOperAddr(self, op, emu=None):
421 if emu == None: return None # This operand type requires an emulator 422 423 ret = 0 424 425 if self.imm != None: 426 ret += self.imm 427 428 if self.reg != None: 429 ret += emu.getRegister(self.reg) 430 431 if self.index != None: 432 ret += (emu.getRegister(self.index) * self.scale) 433 434 # Handle x86 segmentation 435 base, size = emu.getSegmentInfo(op) 436 ret += base 437 438 return ret + self.disp
439
440 - def _getOperBase(self, emu=None):
441 # Special SIB only method for getting the SIB base value 442 if self.imm: 443 return self.imm 444 if emu: 445 return emu.getRegister(self.reg) 446 return None
447
448 - def render(self, mcanv, op, idx):
449 450 mcanv.addNameText(sizenames[self.tsize]) 451 mcanv.addText(" [") 452 if self.imm != None: 453 name = addrToName(mcanv, self.imm) 454 mcanv.addVaText(name, self.imm) 455 456 if self.reg != None: 457 name = self._dis_regctx.getRegisterName(self.reg) 458 mcanv.addNameText(name, typename="registers") 459 460 # Does our SIB have a scale 461 if self.index != None: 462 mcanv.addText(" + ") 463 name = self._dis_regctx.getRegisterName(self.index) 464 mcanv.addNameText(name, typename="registers") 465 if self.scale != 1: 466 mcanv.addText(" * ") 467 mcanv.addNameText(str(self.scale)) 468 469 hint = mcanv.syms.getSymHint(op.va, idx) 470 if hint != None: 471 mcanv.addText(" + ") 472 mcanv.addNameText(hint) 473 474 else: 475 # If we have a displacement, add it. 476 if self.disp != 0: 477 mcanv.addText(" + ") 478 mcanv.addNameText(str(self.disp)) 479 480 mcanv.addText("]")
481
482 -class i386Opcode(envi.Opcode):
483 484 # Printable prefix names 485 prefix_names = [ 486 (PREFIX_LOCK, "lock"), 487 (PREFIX_REPNZ, "repnz"), 488 (PREFIX_REP, "rep"), 489 (PREFIX_CS, "cs"), 490 (PREFIX_SS, "ss"), 491 (PREFIX_DS, "ds"), 492 (PREFIX_ES, "es"), 493 (PREFIX_FS, "fs"), 494 (PREFIX_GS, "gs"), 495 ] 496 497
498 - def getBranches(self, emu=None):
499 ret = [] 500 501 # To start with we have no flags. 502 flags = 0 503 addb = False 504 505 # If we are a conditional branch, even our fallthrough 506 # case is conditional... 507 if self.opcode == opcode86.INS_BRANCHCC: 508 flags |= envi.BR_COND 509 addb = True 510 511 # If we can fall through, reflect that... 512 if not self.iflags & envi.IF_NOFALL: 513 ret.append((self.va + self.size, flags|envi.BR_FALL)) 514 515 # In intel, if we have no operands, it has no 516 # further branches... 517 if len(self.opers) == 0: 518 return ret 519 520 # Check for a call... 521 if self.opcode == opcode86.INS_CALL: 522 flags |= envi.BR_PROC 523 addb = True 524 525 # A conditional call? really? what compiler did you use? ;) 526 elif self.opcode == opcode86.INS_CALLCC: 527 flags |= (envi.BR_PROC | envi.BR_COND) 528 addb = True 529 530 elif self.opcode == opcode86.INS_BRANCH: 531 oper0 = self.opers[0] 532 if isinstance(oper0, i386SibOper) and oper0.scale == 4: 533 # In the case with no emulator, note that our deref is 534 # from the base of a table. If we have one, parse out all the 535 # valid pointers from our base 536 base = oper0._getOperBase(emu) 537 if emu == None: 538 ret.append((base, flags | envi.BR_DEREF | envi.BR_TABLE)) 539 540 else: 541 # Since we're parsing this out, lets just resolve the derefs 542 # for our caller... 543 dest = emu.readMemValue(base, oper0.tsize) 544 while emu.isValidPointer(dest): 545 ret.append((dest, envi.BR_COND)) 546 base += oper0.tsize 547 dest = emu.readMemValue(base, oper0.tsize) 548 else: 549 addb = True 550 551 if addb: 552 oper0 = self.opers[0] 553 if oper0.isDeref(): 554 flags |= envi.BR_DEREF 555 tova = oper0.getOperAddr(self, emu=emu) 556 else: 557 tova = oper0.getOperValue(self, emu=emu) 558 559 ret.append((tova, flags)) 560 561 return ret
562
563 - def render(self, mcanv):
564 """ 565 Render this opcode to the specified memory canvas 566 """ 567 if self.prefixes: 568 pfx = self.getPrefixName() 569 if pfx: 570 mcanv.addNameText("%s: " % pfx, pfx) 571 572 mcanv.addNameText(self.mnem, typename="mnemonic") 573 mcanv.addText(" ") 574 575 # Allow each of our operands to render 576 imax = len(self.opers) 577 lasti = imax - 1 578 for i in xrange(imax): 579 oper = self.opers[i] 580 oper.render(mcanv, self, i) 581 if i != lasti: 582 mcanv.addText(",")
583 584 operand_range = (2,3,4) 585 586 MODE_16 = 0 587 MODE_32 = 1 588 MODE_64 = 2 589
590 -class i386Disasm:
591
592 - def __init__(self, mode=MODE_32):
593 self._dis_mode = MODE_32 594 self._dis_prefixes = i386_prefixes 595 self._dis_regctx = i386RegisterContext() 596 597 # This will make function lookups nice and quick 598 self._dis_amethods = [ None for x in range(22) ] 599 self._dis_amethods[opcode86.ADDRMETH_A>>16] = self.ameth_a 600 self._dis_amethods[opcode86.ADDRMETH_C>>16] = self.ameth_c 601 self._dis_amethods[opcode86.ADDRMETH_D>>16] = self.ameth_d 602 self._dis_amethods[opcode86.ADDRMETH_E>>16] = self.ameth_e 603 self._dis_amethods[opcode86.ADDRMETH_M>>16] = self.ameth_e 604 self._dis_amethods[opcode86.ADDRMETH_N>>16] = self.ameth_n 605 self._dis_amethods[opcode86.ADDRMETH_Q>>16] = self.ameth_q 606 self._dis_amethods[opcode86.ADDRMETH_R>>16] = self.ameth_e 607 self._dis_amethods[opcode86.ADDRMETH_W>>16] = self.ameth_w 608 self._dis_amethods[opcode86.ADDRMETH_I>>16] = self.ameth_i 609 self._dis_amethods[opcode86.ADDRMETH_J>>16] = self.ameth_j 610 self._dis_amethods[opcode86.ADDRMETH_O>>16] = self.ameth_o 611 self._dis_amethods[opcode86.ADDRMETH_G>>16] = self.ameth_g 612 self._dis_amethods[opcode86.ADDRMETH_P>>16] = self.ameth_p 613 self._dis_amethods[opcode86.ADDRMETH_S>>16] = self.ameth_s 614 self._dis_amethods[opcode86.ADDRMETH_U>>16] = self.ameth_u 615 self._dis_amethods[opcode86.ADDRMETH_V>>16] = self.ameth_v 616 self._dis_amethods[opcode86.ADDRMETH_X>>16] = self.ameth_x 617 self._dis_amethods[opcode86.ADDRMETH_Y>>16] = self.ameth_y 618 619 # Offsets used to add in addressing method parsers 620 self.ROFFSETMMX = getRegOffset(i386regs, "mm0") 621 self.ROFFSETSIMD = getRegOffset(i386regs, "xmm0") 622 self.ROFFSETDEBUG = getRegOffset(i386regs, "debug0") 623 self.ROFFSETCTRL = getRegOffset(i386regs, "ctrl0") 624 self.ROFFSETTEST = getRegOffset(i386regs, "test0") 625 self.ROFFSETSEG = getRegOffset(i386regs, "es") 626 self.ROFFSETFPU = getRegOffset(i386regs, "st0")
627
628 - def parse_modrm(self, byte):
629 # Pass in a string with an offset for speed rather than a new string 630 mod = (byte >> 6) & 0x3 631 reg = (byte >> 3) & 0x7 632 rm = byte & 0x7 633 #print "MOD/RM",hex(byte),mod,reg,rm 634 return (mod,reg,rm)
635
636 - def byteRegOffset(self, val):
637 # NOTE: This is used for high byte metas in 32 bit mode only 638 if val < 4: 639 return val + RMETA_LOW8 640 return (val-4) + RMETA_HIGH8
641 642 # Parse modrm as though addr mode might not be just a reg
643 - def extended_parse_modrm(self, bytes, offset, opersize, regbase=0):
644 """ 645 Return a tuple of (size, Operand) 646 """ 647 648 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 649 650 size = 1 651 652 #print "EXTENDED MOD REG RM",mod,reg,rm 653 654 if mod == 3: # Easy one, just a reg 655 # FIXME only use self.byteRegOffset in 32 bit mode, NOT 64 bit... 656 if opersize == 1: rm = self.byteRegOffset(rm) 657 elif opersize == 2: rm += RMETA_LOW16 658 #print "OPERSIZE",opersize,rm 659 return (size, i386RegOper(rm+regbase, opersize)) 660 661 elif mod == 0: 662 # means we are [reg] unless rm == 4 (SIB) or rm == 5 ([imm32]) 663 if rm == 5: 664 imm = e_bits.parsebytes(bytes, offset + size, 4) 665 size += 4 666 # NOTE: in 64 bit mode, *this* is where we differ, (This case is RIP relative) 667 return(size, i386ImmMemOper(imm, opersize)) 668 669 elif rm == 4: 670 sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod) 671 size += sibsize 672 if base != None: base += regbase # Adjust for different register addressing modes 673 if index != None: index += regbase # Adjust for different register addressing modes 674 oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale]) 675 return (size, oper) 676 677 else: 678 return(size, i386RegMemOper(regbase+rm, opersize)) 679 680 elif mod == 1: 681 # mod 1 means we are [ reg + disp8 ] (unless rm == 4 which means sib + disp8) 682 if rm == 4: 683 sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod) 684 size += sibsize 685 disp = e_bits.parsebytes(bytes, offset+size, 1, sign=True) 686 size += 1 687 if base != None: base += regbase # Adjust for different register addressing modes 688 if index != None: index += regbase # Adjust for different register addressing modes 689 oper = i386SibOper(opersize, reg=base, index=index, scale=scale_lookup[scale], disp=disp) 690 return (size,oper) 691 else: 692 x = e_bits.signed(ord(bytes[offset+size]), 1) 693 size += 1 694 return(size, i386RegMemOper(regbase+rm, opersize, disp=x)) 695 696 elif mod == 2: 697 # Means we are [ reg + disp32 ] (unless rm == 4 which means SIB + disp32) 698 if rm == 4: 699 sibsize, scale, index, base, imm = self.parse_sib(bytes,offset+size,mod) 700 size += sibsize 701 disp = e_bits.parsebytes(bytes, offset + size, 4, sign=True) 702 size += 4 703 if base != None: base += regbase # Adjust for different register addressing modes 704 if index != None: index += regbase # Adjust for different register addressing modes 705 oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale], disp=disp) 706 return (size, oper) 707 708 else: 709 # NOTE: Immediate displacements in SIB are still 4 bytes in 64 bit mode 710 disp = e_bits.parsebytes(bytes, offset+size, 4, sign=True) 711 size += 4 712 return(size, i386RegMemOper(regbase+rm, opersize, disp=disp)) 713 714 else: 715 raise Exception("How does mod == %d" % mod)
716
717 - def parse_sib(self, bytes, offset, mod):
718 """ 719 Return a tuple of (size, scale, index, base, imm) 720 """ 721 byte = ord(bytes[offset]) 722 scale = (byte >> 6) & 0x3 723 index = (byte >> 3) & 0x7 724 base = byte & 0x7 725 imm = None 726 727 size = 1 728 729 # Special SIB case with no index reg 730 if index == 4: 731 index = None 732 733 # Special SIB case with possible immediate 734 if base == 5: 735 if mod == 0: # [ imm32 + index * scale ] 736 base = None 737 imm = e_bits.parsebytes(bytes, offset+size, 4, sign=False) 738 size += 4 739 # FIXME is there special stuff needed here? 740 elif mod == 1: 741 pass 742 #raise "OMG MOD 1" 743 elif mod == 2: 744 pass 745 #raise "OMG MOD 2" 746 747 return (size, scale, index, base, imm)
748 749
750 - def _dis_calc_tsize(self, opertype, prefixes):
751 """ 752 Use the oper type and prefixes to decide on the tsize for 753 the operand. 754 """ 755 mode = MODE_32 756 757 #print "OPERTYPE",hex(opertype) 758 sizelist = opcode86.OPERSIZE.get(opertype, None) 759 if sizelist == None: 760 raise "OPERSIZE FAIL: %.8x" % opertype 761 762 if prefixes & PREFIX_OP_SIZE: 763 764 mode = MODE_16 765 766 #print "OPERTYPE",hex(opertype) 767 #print "SIZELIST",repr(sizelist) 768 return sizelist[mode]
769
770 - def disasm(self, bytes, offset, va):
771 772 # Stuff for opcode parsing 773 tabdesc = all_tables[0] # A tuple (optable, shiftbits, mask byte, sub, max) 774 startoff = offset # Use startoff as a size knob if needed 775 776 # Stuff we'll be putting in the opcode object 777 optype = None # This gets set if we successfully decode below 778 mnem = None 779 operands = [] 780 781 prefixes = 0 782 783 while True: 784 785 obyte = ord(bytes[offset]) 786 787 # This line changes in 64 bit mode 788 p = self._dis_prefixes[obyte] 789 if p == None: 790 break 791 if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f: 792 break 793 prefixes |= p 794 offset += 1 795 continue 796 797 #pdone = False 798 while True: 799 800 obyte = ord(bytes[offset]) 801 802 #print "OBYTE",hex(obyte) 803 if (obyte > tabdesc[4]): 804 #print "Jumping To Overflow Table:", tabdesc[5] 805 tabdesc = all_tables[tabdesc[5]] 806 807 tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] 808 #print "TABIDX: %d" % tabidx 809 opdesc = tabdesc[0][tabidx] 810 #print 'OPDESC: %s' % repr(opdesc) 811 812 # Hunt down multi-byte opcodes 813 nexttable = opdesc[0] 814 #print "NEXT",nexttable,hex(obyte) 815 if nexttable != 0: # If we have a sub-table specified, use it. 816 #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] 817 tabdesc = all_tables[nexttable] 818 819 # In the case of 66 0f, the next table is *already* assuming we ate 820 # the 66 *and* the 0f... oblidge them. 821 if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f: 822 offset += 1 823 824 # Account for the table jump we made 825 offset += 1 826 827 continue 828 829 # We are now on the final table... 830 #print repr(opdesc) 831 mnem = opdesc[6] 832 optype = opdesc[1] 833 if tabdesc[2] == 0xff: 834 offset += 1 # For our final opcode byte 835 break 836 837 if optype == 0: 838 #print tabidx 839 #print opdesc 840 #print "OPTTYPE 0" 841 raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16]) 842 843 operoffset = 0 844 # Begin parsing operands based off address method 845 for i in operand_range: 846 847 oper = None # Set this if we end up with an operand 848 osize = 0 849 850 # Pull out the operand description from the table 851 operflags = opdesc[i] 852 opertype = operflags & opcode86.OPTYPE_MASK 853 addrmeth = operflags & opcode86.ADDRMETH_MASK 854 855 # If there are no more operands, break out of the loop! 856 if operflags == 0: 857 break 858 859 #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) 860 861 tsize = self._dis_calc_tsize(opertype, prefixes) 862 863 #print hex(opertype),hex(addrmeth) 864 865 866 # If addrmeth is zero, we have operands embedded in the opcode 867 if addrmeth == 0: 868 osize = 0 869 oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes) 870 871 else: 872 #print "ADDRTYPE",hex(addrmeth) 873 ameth = self._dis_amethods[addrmeth >> 16] 874 #print "AMETH",ameth 875 if ameth == None: 876 raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) 877 878 # NOTE: Depending on your addrmethod you may get beginning of operands, or offset 879 try: 880 if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: 881 osize, oper = ameth(bytes, offset+operoffset, tsize, prefixes) 882 883 # If we are a sign extended immediate and not the same as the other operand, 884 # do the sign extension during disassembly so nothing else has to worry about it.. 885 if operflags & opcode86.OP_SIGNED and len(operands) and tsize != operands[-1].tsize: 886 otsize = operands[-1].tsize 887 oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize) 888 oper.tsize = otsize 889 890 else: 891 osize, oper = ameth(bytes, offset, tsize, prefixes) 892 893 except struct.error, e: 894 # Catch struct unpack errors due to insufficient data length 895 raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16]) 896 897 if oper != None: 898 # This is a filty hack for now... 899 oper._dis_regctx = self._dis_regctx 900 operands.append(oper) 901 operoffset += osize 902 903 # Pull in the envi generic instruction flags 904 iflags = iflag_lookup.get(optype, 0) 905 906 if priv_lookup.get(mnem, False): 907 iflags |= envi.IF_PRIV 908 909 # Lea will have a reg-mem/sib operand with _is_deref True, but should be false 910 if optype == opcode86.INS_LEA: 911 operands[1]._is_deref = False 912 913 ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags) 914 915 return ret
916 917 # Declare all the address method parsers here! 918
919 - def ameth_0(self, operflags, operval, tsize, prefixes):
920 # Special address method for opcodes with embedded operands 921 if operflags & opcode86.OP_REG: 922 return i386RegOper(operval, tsize) 923 elif operflags & opcode86.OP_IMM: 924 return i386ImmOper(operval, tsize) 925 raise Exception("Unknown ameth_0! operflags: 0x%.8x" % operflags)
926
927 - def ameth_a(self, bytes, offset, tsize, prefixes):
928 imm = e_bits.parsebytes(bytes, offset, tsize) 929 seg = e_bits.parsebytes(bytes, offset+tsize, 2) 930 # THIS BEING GHETTORIGGED ONLY EFFECTS callf jmpf 931 #print "FIXME: envi.intel.ameth_a skipping seg prefix %d" % seg 932 return (tsize+2, i386ImmOper(imm, tsize))
933
934 - def ameth_e(self, bytes, offset, tsize, prefixes):
935 return self.extended_parse_modrm(bytes, offset, tsize)
936
937 - def ameth_n(self, bytes, offset, tsize, prefixes):
938 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 939 return (1, i386RegOper(rm + self.ROFFSETMMX, tsize))
940
941 - def ameth_q(self, bytes, offset, tsize, prefixes):
942 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 943 if mod == 3: 944 return (1, i386RegOper(rm + self.ROFFSETMMX, tsize)) 945 return self.extended_parse_modrm(bytes, offset, tsize)
946
947 - def ameth_w(self, bytes, offset, tsize, prefixes):
948 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 949 if mod == 3: 950 return (1, i386RegOper(rm + self.ROFFSETSIMD, tsize)) 951 return self.extended_parse_modrm(bytes, offset, tsize)
952
953 - def ameth_i(self, bytes, offset, tsize, prefixes):
954 # FIXME sign extend here if opflags has OP_SIGNED 955 imm = e_bits.parsebytes(bytes, offset, tsize) 956 return (tsize, i386ImmOper(imm, tsize))
957
958 - def ameth_j(self, bytes, offset, tsize, prefixes):
959 imm = e_bits.parsebytes(bytes, offset, tsize, sign=True) 960 return (tsize, i386PcRelOper(imm, tsize))
961
962 - def ameth_o(self, bytes, offset, tsize, prefixes):
963 # NOTE: displacement *stays* 32 bit even with REX 964 # (but 16 bit should probably be supported) 965 imm = e_bits.parsebytes(bytes, offset, 4, sign=False) 966 return (4, i386ImmMemOper(imm, tsize))
967
968 - def ameth_g(self, bytes, offset, tsize, prefixes):
969 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 970 if tsize == 1: reg = self.byteRegOffset(reg) 971 elif tsize == 2: reg += RMETA_LOW16 972 return (0, i386RegOper(reg, tsize))
973
974 - def ameth_c(self, bytes, offset, tsize, prefixes):
975 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 976 return (0, i386RegOper(reg+self.ROFFSETCTRL, tsize))
977
978 - def ameth_d(self, bytes, offset, tsize, prefixes):
979 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 980 return (0, i386RegOper(reg+self.ROFFSETDEBUG, tsize))
981
982 - def ameth_p(self, bytes, offset, tsize, prefixes):
983 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 984 return (0, i386RegOper(reg+self.ROFFSETMMX, tsize))
985
986 - def ameth_s(self, bytes, offset, tsize, prefixes):
987 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 988 return (0, i386RegOper(reg+self.ROFFSETSEG, tsize))
989
990 - def ameth_u(self, bytes, offset, tsize, prefixes):
991 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 992 return (0, i386RegOper(reg+self.ROFFSETTEST, tsize))
993
994 - def ameth_v(self, bytes, offset, tsize, prefixes):
995 mod,reg,rm = self.parse_modrm(ord(bytes[offset])) 996 return (0, i386RegOper(reg+self.ROFFSETSIMD, tsize))
997
998 - def ameth_x(self, bytes, offset, tsize, prefixes):
999 #FIXME this needs the DS over-ride, but is only for outsb which we don't support 1000 return (0, i386RegMemOper(REG_ESI, tsize))
1001
1002 - def ameth_y(self, bytes, offset, tsize, prefixes):
1003 #FIXME this needs the ES over-ride, but is only for insb which we don't support 1004 return (0, i386RegMemOper(REG_ESI, tsize))
1005 1006 1007 if __name__ == '__main__': 1008 1009 # A little helper to make testing easier 1010 1011 import sys 1012 d = i386Disasm() 1013 b = file(sys.argv[1], 'rb').read() 1014 offset = 0 1015 va = 0x41414141 1016 while offset < len(b): 1017 op = d.disasm(b, offset, va+offset) 1018 print '0x%.8x %s %s' % (va+offset, b[offset:offset+len(op)].encode('hex').ljust(16), repr(op)) 1019 offset += len(op) 1020