1
2 """
3 The guts for the i386 envi opcode disassembler.
4 """
5
6 import struct
7
8 import envi
9 import envi.bits as e_bits
10
11 import opcode86
12 all_tables = opcode86.tables86
13
14
15 from envi.archs.i386.regs import *
16
17
18
19
20 INSTR_PREFIX= 0x0001
21 PREFIX_LOCK = 0x0002
22 PREFIX_REPNZ= 0x0004
23 PREFIX_REPZ = 0x0008
24 PREFIX_REP = 0x0010
25 PREFIX_REP_SIMD= 0x0020
26 PREFIX_OP_SIZE= 0x0040
27 PREFIX_ADDR_SIZE= 0x0080
28 PREFIX_SIMD= 0x0100
29 PREFIX_CS = 0x0200
30 PREFIX_SS = 0x0400
31 PREFIX_DS = 0x0800
32 PREFIX_ES = 0x1000
33 PREFIX_FS = 0x2000
34 PREFIX_GS = 0x4000
35 PREFIX_REG_MASK= 0x8000
36
37
38 RMETA_LOW8 = 0x00080000
39 RMETA_HIGH8 = 0x08080000
40 RMETA_LOW16 = 0x00100000
41
42
43 i386_prefixes = [ None for i in range(256) ]
44 i386_prefixes[0xF0] = PREFIX_LOCK
45 i386_prefixes[0xF2] = PREFIX_REPNZ
46 i386_prefixes[0xF3] = PREFIX_REP
47 i386_prefixes[0x2E] = PREFIX_CS
48 i386_prefixes[0x36] = PREFIX_SS
49 i386_prefixes[0x3E] = PREFIX_DS
50 i386_prefixes[0x26] = PREFIX_ES
51 i386_prefixes[0x64] = PREFIX_FS
52 i386_prefixes[0x65] = PREFIX_GS
53 i386_prefixes[0x66] = PREFIX_OP_SIZE
54 i386_prefixes[0x67] = PREFIX_ADDR_SIZE
55
56
57 scale_lookup = (1, 2, 4, 8)
58
59
60
61 priv_lookup = {
62 "int":True,
63 "in":True,
64 "out":True,
65 "insb":True,
66 "outsb":True,
67 "insd":True,
68 "outsd":True,
69 "vmcall":True,
70 "vmlaunch":True,
71 "vmresume":True,
72 "vmxoff":True,
73 "vmread":True,
74 "vmwrite":True,
75 "rsm":True,
76 "lar":True,
77 "lsl":True,
78 "clts":True,
79 "invd":True,
80 "wbinvd":True,
81 "wrmsr":True,
82 "rdmsr":True,
83 "sysexit":True,
84 "lgdt":True,
85 "lidt":True,
86 "lmsw":True,
87 "monitor":True,
88 "mwait":True,
89 "vmclear":True,
90 "vmptrld":True,
91 "vmptrst":True,
92 "vmxon":True,
93 }
94
95
96 iflag_lookup = {
97 opcode86.INS_RET: envi.IF_NOFALL|envi.IF_RET,
98 opcode86.INS_CALL: envi.IF_CALL,
99 opcode86.INS_HALT: envi.IF_NOFALL,
100 opcode86.INS_CALLCC: envi.IF_CALL,
101 opcode86.INS_BRANCH: envi.IF_NOFALL | envi.IF_BRANCH,
102 opcode86.INS_BRANCHCC: envi.IF_BRANCH,
103 }
104
105 sizenames = ["" for x in range(17)]
106 sizenames[1] = "byte"
107 sizenames[2] = "word"
108 sizenames[4] = "dword"
109 sizenames[8] = "qword"
110 sizenames[16] = "oword"
111
117
118
119
120
121
122
123
125
127 self.reg = reg
128 self.tsize = tsize
129
130 - def repr(self, op):
132
136
139
140 - def render(self, mcanv, op, idx):
147
149 if not isinstance(other, i386RegOper):
150 return False
151 if other.reg != self.reg:
152 return False
153 if other.tsize != self.tsize:
154 return False
155 return True
156
158 """
159 An operand representing an immediate.
160 """
162 self.imm = imm
163 self.tsize = tsize
164
165 - def repr(self, op):
166 ival = self.imm
167 if ival > 4096:
168 return "0x%.8x" % ival
169 return str(ival)
170
173
174 - def render(self, mcanv, op, idx):
190
192 if not isinstance(other, i386ImmOper):
193 return False
194 if other.imm != self.imm:
195 return False
196 if other.tsize != self.tsize:
197 return False
198 return True
199
201 """
202 This is the operand used for EIP relative offsets
203 for operands on instructions like jmp/call
204 """
206 self.imm = imm
207 self.tsize = tsize
208
209 - def repr(self, op):
210 return "0x%.8x" % (op.va + op.size + self.imm)
211
214
216 return op.va + op.size + self.imm
217
218 - def render(self, mcanv, op, idx):
226
228 if not isinstance(other, i386PcRelOper):
229 return False
230 if other.imm != self.imm:
231 return False
232 if other.tsize != self.tsize:
233 return False
234 return True
235
237 """
238 An operand which represents the result of reading/writting memory from the
239 dereference (with possible displacement) from a given register.
240 """
241 - def __init__(self, reg, tsize, disp=0):
242 self.reg = reg
243 self.tsize = tsize
244 self.disp = disp
245 self._is_deref = True
246
247 - def repr(self, op):
248 r = self._dis_regctx.getRegisterName(self.reg)
249 if self.disp > 0:
250 return "%s [%s + %d]" % (sizenames[self.tsize],r,self.disp)
251 elif self.disp < 0:
252 return "%s [%s - %d]" % (sizenames[self.tsize],r,abs(self.disp))
253 return "%s [%s]" % (sizenames[self.tsize],r)
254
258
261
267
269
270 return self._is_deref
271
272 - def render(self, mcanv, op, idx):
289
291 if not isinstance(other, i386RegMemOper):
292 return False
293 if other.reg != self.reg:
294 return False
295 if other.disp != self.disp:
296 return False
297 if other.tsize != self.tsize:
298 return False
299 return True
300
302 """
303 An operand which represents the dereference (memory read/write) of
304 a memory location associated with an immediate.
305 """
307 self.imm = imm
308 self.tsize = tsize
309 self._is_deref = True
310
312
313 return self._is_deref
314
315 - def repr(self, op):
316 return "%s [0x%.8x]" % (sizenames[self.tsize], self.imm)
317
321
324
331
332 - def render(self, mcanv, op, idx):
345
347 if not isinstance(other, i386ImmMemOper):
348 return False
349 if other.imm != self.imm:
350 return False
351 if other.tsize != self.tsize:
352 return False
353 return True
354
356 """
357 An operand which represents the result of reading/writting memory from the
358 dereference (with possible displacement) from a given register.
359 """
360 - def __init__(self, tsize, reg=None, imm=None, index=None, scale=1, disp=0):
361 self.reg = reg
362 self.imm = imm
363 self.index = index
364 self.scale = scale
365 self.tsize = tsize
366 self.disp = disp
367 self._is_deref = True
368
370 if not isinstance(other, i386SibOper):
371 return False
372 if other.imm != self.imm:
373 return False
374 if other.reg != self.reg:
375 return False
376 if other.index != self.index:
377 return False
378 if other.scale != self.scale:
379 return False
380 if other.disp != self.disp:
381 return False
382 if other.tsize != self.tsize:
383 return False
384 return True
385
387 return self._is_deref
388
389 - def repr(self, op):
390
391 r = "%s [" % sizenames[self.tsize]
392
393 if self.reg != None:
394 r += self._dis_regctx.getRegisterName(self.reg)
395
396 if self.imm != None:
397 r += "0x%.8x" % self.imm
398
399 if self.index != None:
400 r += " + %s" % self._dis_regctx.getRegisterName(self.index)
401 if self.scale != 1:
402 r += " * %d" % self.scale
403
404 if self.disp > 0:
405 r += " + %d" % self.disp
406 elif self.disp < 0:
407 r += " - %d" % abs(self.disp)
408
409 r += "]"
410
411 return r
412
416
419
421 if emu == None: return None
422
423 ret = 0
424
425 if self.imm != None:
426 ret += self.imm
427
428 if self.reg != None:
429 ret += emu.getRegister(self.reg)
430
431 if self.index != None:
432 ret += (emu.getRegister(self.index) * self.scale)
433
434
435 base, size = emu.getSegmentInfo(op)
436 ret += base
437
438 return ret + self.disp
439
441
442 if self.imm:
443 return self.imm
444 if emu:
445 return emu.getRegister(self.reg)
446 return None
447
448 - def render(self, mcanv, op, idx):
481
483
484
485 prefix_names = [
486 (PREFIX_LOCK, "lock"),
487 (PREFIX_REPNZ, "repnz"),
488 (PREFIX_REP, "rep"),
489 (PREFIX_CS, "cs"),
490 (PREFIX_SS, "ss"),
491 (PREFIX_DS, "ds"),
492 (PREFIX_ES, "es"),
493 (PREFIX_FS, "fs"),
494 (PREFIX_GS, "gs"),
495 ]
496
497
562
564 """
565 Render this opcode to the specified memory canvas
566 """
567 if self.prefixes:
568 pfx = self.getPrefixName()
569 if pfx:
570 mcanv.addNameText("%s: " % pfx, pfx)
571
572 mcanv.addNameText(self.mnem, typename="mnemonic")
573 mcanv.addText(" ")
574
575
576 imax = len(self.opers)
577 lasti = imax - 1
578 for i in xrange(imax):
579 oper = self.opers[i]
580 oper.render(mcanv, self, i)
581 if i != lasti:
582 mcanv.addText(",")
583
584 operand_range = (2,3,4)
585
586 MODE_16 = 0
587 MODE_32 = 1
588 MODE_64 = 2
589
591
627
629
630 mod = (byte >> 6) & 0x3
631 reg = (byte >> 3) & 0x7
632 rm = byte & 0x7
633
634 return (mod,reg,rm)
635
641
642
644 """
645 Return a tuple of (size, Operand)
646 """
647
648 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
649
650 size = 1
651
652
653
654 if mod == 3:
655
656 if opersize == 1: rm = self.byteRegOffset(rm)
657 elif opersize == 2: rm += RMETA_LOW16
658
659 return (size, i386RegOper(rm+regbase, opersize))
660
661 elif mod == 0:
662
663 if rm == 5:
664 imm = e_bits.parsebytes(bytes, offset + size, 4)
665 size += 4
666
667 return(size, i386ImmMemOper(imm, opersize))
668
669 elif rm == 4:
670 sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod)
671 size += sibsize
672 if base != None: base += regbase
673 if index != None: index += regbase
674 oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale])
675 return (size, oper)
676
677 else:
678 return(size, i386RegMemOper(regbase+rm, opersize))
679
680 elif mod == 1:
681
682 if rm == 4:
683 sibsize, scale, index, base, imm = self.parse_sib(bytes, offset+size, mod)
684 size += sibsize
685 disp = e_bits.parsebytes(bytes, offset+size, 1, sign=True)
686 size += 1
687 if base != None: base += regbase
688 if index != None: index += regbase
689 oper = i386SibOper(opersize, reg=base, index=index, scale=scale_lookup[scale], disp=disp)
690 return (size,oper)
691 else:
692 x = e_bits.signed(ord(bytes[offset+size]), 1)
693 size += 1
694 return(size, i386RegMemOper(regbase+rm, opersize, disp=x))
695
696 elif mod == 2:
697
698 if rm == 4:
699 sibsize, scale, index, base, imm = self.parse_sib(bytes,offset+size,mod)
700 size += sibsize
701 disp = e_bits.parsebytes(bytes, offset + size, 4, sign=True)
702 size += 4
703 if base != None: base += regbase
704 if index != None: index += regbase
705 oper = i386SibOper(opersize, reg=base, imm=imm, index=index, scale=scale_lookup[scale], disp=disp)
706 return (size, oper)
707
708 else:
709
710 disp = e_bits.parsebytes(bytes, offset+size, 4, sign=True)
711 size += 4
712 return(size, i386RegMemOper(regbase+rm, opersize, disp=disp))
713
714 else:
715 raise Exception("How does mod == %d" % mod)
716
718 """
719 Return a tuple of (size, scale, index, base, imm)
720 """
721 byte = ord(bytes[offset])
722 scale = (byte >> 6) & 0x3
723 index = (byte >> 3) & 0x7
724 base = byte & 0x7
725 imm = None
726
727 size = 1
728
729
730 if index == 4:
731 index = None
732
733
734 if base == 5:
735 if mod == 0:
736 base = None
737 imm = e_bits.parsebytes(bytes, offset+size, 4, sign=False)
738 size += 4
739
740 elif mod == 1:
741 pass
742
743 elif mod == 2:
744 pass
745
746
747 return (size, scale, index, base, imm)
748
749
751 """
752 Use the oper type and prefixes to decide on the tsize for
753 the operand.
754 """
755 mode = MODE_32
756
757
758 sizelist = opcode86.OPERSIZE.get(opertype, None)
759 if sizelist == None:
760 raise "OPERSIZE FAIL: %.8x" % opertype
761
762 if prefixes & PREFIX_OP_SIZE:
763
764 mode = MODE_16
765
766
767
768 return sizelist[mode]
769
770 - def disasm(self, bytes, offset, va):
771
772
773 tabdesc = all_tables[0]
774 startoff = offset
775
776
777 optype = None
778 mnem = None
779 operands = []
780
781 prefixes = 0
782
783 while True:
784
785 obyte = ord(bytes[offset])
786
787
788 p = self._dis_prefixes[obyte]
789 if p == None:
790 break
791 if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f:
792 break
793 prefixes |= p
794 offset += 1
795 continue
796
797
798 while True:
799
800 obyte = ord(bytes[offset])
801
802
803 if (obyte > tabdesc[4]):
804
805 tabdesc = all_tables[tabdesc[5]]
806
807 tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2]
808
809 opdesc = tabdesc[0][tabidx]
810
811
812
813 nexttable = opdesc[0]
814
815 if nexttable != 0:
816
817 tabdesc = all_tables[nexttable]
818
819
820
821 if obyte == 0x66 and ord(bytes[offset+1]) == 0x0f:
822 offset += 1
823
824
825 offset += 1
826
827 continue
828
829
830
831 mnem = opdesc[6]
832 optype = opdesc[1]
833 if tabdesc[2] == 0xff:
834 offset += 1
835 break
836
837 if optype == 0:
838
839
840
841 raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16])
842
843 operoffset = 0
844
845 for i in operand_range:
846
847 oper = None
848 osize = 0
849
850
851 operflags = opdesc[i]
852 opertype = operflags & opcode86.OPTYPE_MASK
853 addrmeth = operflags & opcode86.ADDRMETH_MASK
854
855
856 if operflags == 0:
857 break
858
859
860
861 tsize = self._dis_calc_tsize(opertype, prefixes)
862
863
864
865
866
867 if addrmeth == 0:
868 osize = 0
869 oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes)
870
871 else:
872
873 ameth = self._dis_amethods[addrmeth >> 16]
874
875 if ameth == None:
876 raise Exception("Implement Addressing Method 0x%.8x" % addrmeth)
877
878
879 try:
880 if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J:
881 osize, oper = ameth(bytes, offset+operoffset, tsize, prefixes)
882
883
884
885 if operflags & opcode86.OP_SIGNED and len(operands) and tsize != operands[-1].tsize:
886 otsize = operands[-1].tsize
887 oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize)
888 oper.tsize = otsize
889
890 else:
891 osize, oper = ameth(bytes, offset, tsize, prefixes)
892
893 except struct.error, e:
894
895 raise envi.InvalidInstruction(bytes=bytes[startoff:startoff+16])
896
897 if oper != None:
898
899 oper._dis_regctx = self._dis_regctx
900 operands.append(oper)
901 operoffset += osize
902
903
904 iflags = iflag_lookup.get(optype, 0)
905
906 if priv_lookup.get(mnem, False):
907 iflags |= envi.IF_PRIV
908
909
910 if optype == opcode86.INS_LEA:
911 operands[1]._is_deref = False
912
913 ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags)
914
915 return ret
916
917
918
919 - def ameth_0(self, operflags, operval, tsize, prefixes):
926
927 - def ameth_a(self, bytes, offset, tsize, prefixes):
928 imm = e_bits.parsebytes(bytes, offset, tsize)
929 seg = e_bits.parsebytes(bytes, offset+tsize, 2)
930
931
932 return (tsize+2, i386ImmOper(imm, tsize))
933
934 - def ameth_e(self, bytes, offset, tsize, prefixes):
936
937 - def ameth_n(self, bytes, offset, tsize, prefixes):
938 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
939 return (1, i386RegOper(rm + self.ROFFSETMMX, tsize))
940
941 - def ameth_q(self, bytes, offset, tsize, prefixes):
946
947 - def ameth_w(self, bytes, offset, tsize, prefixes):
952
953 - def ameth_i(self, bytes, offset, tsize, prefixes):
957
958 - def ameth_j(self, bytes, offset, tsize, prefixes):
961
962 - def ameth_o(self, bytes, offset, tsize, prefixes):
967
968 - def ameth_g(self, bytes, offset, tsize, prefixes):
973
974 - def ameth_c(self, bytes, offset, tsize, prefixes):
975 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
976 return (0, i386RegOper(reg+self.ROFFSETCTRL, tsize))
977
978 - def ameth_d(self, bytes, offset, tsize, prefixes):
979 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
980 return (0, i386RegOper(reg+self.ROFFSETDEBUG, tsize))
981
982 - def ameth_p(self, bytes, offset, tsize, prefixes):
985
986 - def ameth_s(self, bytes, offset, tsize, prefixes):
989
990 - def ameth_u(self, bytes, offset, tsize, prefixes):
991 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
992 return (0, i386RegOper(reg+self.ROFFSETTEST, tsize))
993
994 - def ameth_v(self, bytes, offset, tsize, prefixes):
995 mod,reg,rm = self.parse_modrm(ord(bytes[offset]))
996 return (0, i386RegOper(reg+self.ROFFSETSIMD, tsize))
997
998 - def ameth_x(self, bytes, offset, tsize, prefixes):
1001
1002 - def ameth_y(self, bytes, offset, tsize, prefixes):
1005
1006
1007 if __name__ == '__main__':
1008
1009
1010
1011 import sys
1012 d = i386Disasm()
1013 b = file(sys.argv[1], 'rb').read()
1014 offset = 0
1015 va = 0x41414141
1016 while offset < len(b):
1017 op = d.disasm(b, offset, va+offset)
1018 print '0x%.8x %s %s' % (va+offset, b[offset:offset+len(op)].encode('hex').ljust(16), repr(op))
1019 offset += len(op)
1020