Package Elf
[hide private]
[frames] | no frames]

Source Code for Package Elf

  1  """ 
  2  Kenshoto's Elf parser 
  3   
  4  This package will let you use programatic ninja-fu 
  5  when trying to parse Elf binaries.  The API is based 
  6  around several objects representing constructs in the 
  7  Elf binary format.  The Elf object itself contains 
  8  parsed metadata and lists of things like section headers 
  9  and relocation entries.  Additionally, most of the 
 10  objects implement repr() in some form or another which 
 11  allows you a bunch of readelf-like functionality. 
 12   
 13  *Eventually* this API will allow you to modify Elf binaries 
 14  and spit them back out in working order (not complete, you 
 15  may notice some of the initial code). 
 16   
 17  Send bug reports to Invisigoth or Metr0. 
 18   
 19  """ 
 20  # Copyright (C) 2007 Invisigoth - See LICENSE file for details 
 21  import os 
 22  import sys 
 23  import struct 
 24  import traceback 
 25  import zlib 
 26   
 27  from stat import * 
 28  from Elf.elf_lookup import * 
 29  import vstruct 
 30  import vstruct.defs.elf as vs_elf 
 31   
 32  verbose = False 
 33   
34 -class ElfReloc:
35 """ 36 Elf relocation entries consist mostly of "fixup" address which 37 are taken care of by the loader at runtime. Things like 38 GOT entries, PLT jmp codes etc all have an Elf relocation 39 entry. 40 """ 41
42 - def __init__(self, r_types):
43 self.name = "" 44 self.r_types = r_types
45
46 - def __repr__(self):
47 return "%s %s <%s>" % (hex(self.r_offset),self.getName(),self.getTypeName())
48
49 - def setName(self, name):
50 self.name = name
51
52 - def getName(self):
53 return self.name
54
55 - def getType(self):
56 return self.r_info & 0xff
57
58 - def getTypeName(self):
59 return self.r_types.get(self.getType(),"")
60
61 -class Elf32Reloc(ElfReloc, vs_elf.Elf32Reloc):
62 - def __init__(self, r_types):
63 vs_elf.Elf32Reloc.__init__(self) 64 ElfReloc.__init__(self, r_types)
65
66 - def getSymTabIndex(self):
67 return self.r_info >> 8
68
69 -class Elf32Reloca(ElfReloc, vs_elf.Elf32Reloca):
70 - def __init__(self, r_types):
71 vs_elf.Elf32Reloca.__init__(self) 72 ElfReloc.__init__(self, r_types)
73
74 - def getSymTabIndex(self):
75 return self.r_info >> 8
76
77 -class Elf64Reloc(ElfReloc, vs_elf.Elf64Reloc):
78 - def __init__(self, r_types):
79 vs_elf.Elf64Reloc.__init__(self) 80 ElfReloc.__init__(self, r_types)
81
82 - def getSymTabIndex(self):
83 return self.r_info >> 32
84
85 -class Elf64Reloca(ElfReloc, vs_elf.Elf64Reloca):
86 - def __init__(self, r_types):
87 vs_elf.Elf64Reloca.__init__(self) 88 ElfReloc.__init__(self, r_types)
89
90 - def getSymTabIndex(self):
91 return self.r_info >> 32
92
93 -class ElfDynamic:
94 has_string = [DT_NEEDED,DT_SONAME] 95 """ 96 An object to represent an Elf dynamic entry. 97 (linker/loader directives) 98 """ 99
100 - def __init__(self, bytes=None):
101 self.name = ""
102
103 - def __repr__(self):
104 name = self.getName() 105 if not name: 106 name = hex(self.d_value) 107 return "%s %s" % (name,self.getTypeName())
108
109 - def getName(self):
110 return self.name
111
112 - def setName(self, name):
113 self.name = name
114
115 - def getTypeName(self):
116 return dt_types.get(self.d_tag,"Unknown: %s"%hex(self.d_tag))
117
118 -class Elf32Dynamic(ElfDynamic, vs_elf.Elf32Dynamic):
119 - def __init__(self):
120 vs_elf.Elf32Dynamic.__init__(self) 121 ElfDynamic.__init__(self)
122
123 -class Elf64Dynamic(ElfDynamic, vs_elf.Elf64Dynamic):
124 - def __init__(self):
125 vs_elf.Elf64Dynamic.__init__(self) 126 ElfDynamic.__init__(self)
127
128 -class ElfSymbol:
129 - def __init__(self):
130 self.name = ""
131
132 - def getInfoType(self):
133 return self.st_info & 0xf
134
135 - def getInfoBind(self):
136 return self.st_info >> 4
137
138 - def __cmp__(self, other):
139 if self.st_value > other.st_value: 140 return 1 141 return -1
142
143 - def setName(self,name):
144 self.name = name
145
146 - def getName(self):
147 return self.name
148
149 - def __repr__(self):
150 return "0x%.8x %d %s" % (self.st_value, self.st_size, self.name)
151
152 -class Elf32Symbol(ElfSymbol, vs_elf.Elf32Symbol):
153 - def __init__(self):
154 vs_elf.Elf32Symbol.__init__(self) 155 ElfSymbol.__init__(self)
156
157 -class Elf64Symbol(ElfSymbol, vs_elf.Elf64Symbol):
158 - def __init__(self):
159 vs_elf.Elf64Symbol.__init__(self) 160 ElfSymbol.__init__(self)
161
162 -class ElfPheader:
163
164 - def __init__(self):
165 pass
166
167 - def getTypeName(self):
168 return ph_types.get(self.p_type, "Unknown")
169
170 - def __repr__(self):
171 return '[%35s] VMA: 0x%.8x offset: %8d memsize: %8d align: %8d (filesz: %8d) flags: %x' % ( 172 self.getTypeName(), 173 self.p_vaddr, 174 self.p_offset, 175 self.p_memsz, 176 self.p_align, 177 self.p_filesz, 178 self.p_flags)
179
180 -class Elf32Pheader(ElfPheader, vs_elf.Elf32Pheader):
181 - def __init__(self):
182 vs_elf.Elf32Pheader.__init__(self) 183 ElfPheader.__init__(self)
184
185 -class Elf64Pheader(ElfPheader, vs_elf.Elf64Pheader):
186 - def __init__(self):
187 vs_elf.Elf64Pheader.__init__(self) 188 ElfPheader.__init__(self)
189
190 -class ElfSection:
191 - def __init__(self):
192 self.name = ''
193
194 - def setName(self, name):
195 self.name = name
196
197 - def getName(self):
198 return self.name
199
200 - def __repr__(self):
201 return 'Elf Section: [%20s] VMA: 0x%.8x offset: %8d ent/size: %8d/%8d align: %8d' % ( 202 self.name, 203 self.sh_addr, 204 self.sh_offset, 205 self.sh_entsize, 206 self.sh_size, 207 self.sh_addralign)
208
209 -class Elf32Section(ElfSection, vs_elf.Elf32Section):
210 - def __init__(self):
211 vs_elf.Elf32Section.__init__(self) 212 ElfSection.__init__(self)
213
214 -class Elf64Section(ElfSection, vs_elf.Elf64Section):
215 - def __init__(self):
216 vs_elf.Elf64Section.__init__(self) 217 ElfSection.__init__(self)
218
219 -class Elf(vs_elf.Elf32, vs_elf.Elf64):
220
221 - def __init__(self, fd, inmem=False):
222 223 # Grab a 32bit header to use to check for other 224 # machine types... 225 e = vs_elf.Elf32() 226 fd.seek(0) 227 bytes = fd.read(len(e)) 228 e.vsParse(bytes) 229 if e.e_machine in e_machine_32: 230 vs_elf.Elf32.__init__(self) 231 self.bits = 32 232 self.psize = 4 233 elif e.e_machine in e_machine_64: 234 vs_elf.Elf64.__init__(self) 235 self.bits = 64 236 self.psize = 8 237 else: 238 raise Exception('Unrecognized e_machine: %d' % e.e_machine) 239 240 self.fd = fd 241 242 bytes = self.readAtOffset(0, len(self)) 243 self.vsParse(bytes) 244 245 if self.e_machine == EM_386: 246 self.r_types = r_types_386 247 elif self.e_machine == EM_X86_64: 248 self.r_types = r_types_amd64 249 else: 250 self.r_types = {} 251 252 self.pheaders = [] 253 self.sections = [] 254 self.secnames = {} 255 self.symbols = [] 256 self.relocs = [] 257 self.symbols_by_name = {} 258 self.symbols_by_addr = {} 259 self.dynamics = [] 260 self.dynamic_symbols = [] 261 262 self._parsePheaders() 263 self._parseSections() 264 self._parseSymbols() 265 self._parseDynamic() 266 self._parseRelocs()
267
268 - def getRelocTypeName(self, rtype):
269 ''' 270 Because relocation type names are decided based on the 271 arch, only the Elf knows for sure... 272 ''' 273 return self.r_types.get(rtype)
274
275 - def _parsePheaders(self):
276 # Load up any program headers we find 277 if self.e_phoff: 278 pbase = self.e_phoff 279 plen = self.e_phentsize 280 for i in range(self.e_phnum): 281 if self.bits == 32: 282 pgm = Elf32Pheader() 283 elif self.bits == 64: 284 pgm = Elf64Pheader() 285 else: 286 raise Exception('Platform not supported: %d' % (self.bits)) 287 288 bytes = self.readAtOffset(pbase, plen) 289 pgm.vsParse(bytes) 290 291 self.pheaders.append(pgm) 292 pbase += plen
293
294 - def _parseSections(self):
295 # Load up all the section headers 296 if self.e_shoff: 297 # Load up the sections 298 sbase = self.e_shoff 299 # FIXME this assumes static sized section headers 300 slen = self.e_shentsize 301 for i in range(self.e_shnum): 302 if self.bits == 32: 303 sec = Elf32Section() 304 elif self.bits == 64: 305 sec = Elf64Section() 306 else: 307 raise Exception('Platform not supported: %d' % (self.bits)) 308 bytes = self.readAtOffset(sbase, slen) 309 sec.vsParse(bytes) 310 self.sections.append(sec) 311 sbase += slen 312 313 # Populate the section names 314 strsec = self.sections[self.e_shstrndx] 315 names = self.readAtOffset(strsec.sh_offset,strsec.sh_size) 316 for sec in self.sections: 317 name = names[sec.sh_name:].split("\x00")[0] 318 if len(name) > 0: 319 sec.setName(name) 320 self.secnames[name] = sec
321
322 - def _parseSymbols(self):
323 """ 324 Parse out the symbols that this elf binary has for us. 325 """ 326 for sec in self.sections: 327 if sec.sh_type == SHT_SYMTAB: 328 symtab = self.readAtOffset(sec.sh_offset, sec.sh_size) 329 while symtab: 330 if self.bits == 32: 331 newsym = Elf32Symbol() 332 elif self.bits == 64: 333 newsym = Elf64Symbol() 334 else: 335 raise Exception('Platform not supported: %d' % (self.bits)) 336 337 newsym.vsParse(symtab) 338 339 if newsym.st_name: 340 name = self.getStrtabString(newsym.st_name, ".strtab") 341 newsym.setName(name) 342 self.addSymbol(newsym) 343 symtab = symtab[len(newsym):]
344
345 - def _parseDynamic(self):
346 symtab = self.getSectionBytes('.dynsym') 347 if symtab == None: 348 return 349 350 while symtab: 351 if self.bits == 32: 352 newsym = Elf32Symbol() 353 elif self.bits == 64: 354 newsym = Elf64Symbol() 355 else: 356 raise Exception('Platform not supported: %d' % (self.bits)) 357 newsym.vsParse(symtab) 358 if newsym.st_name: 359 name = self.getStrtabString(newsym.st_name, ".dynstr") 360 newsym.setName(name) 361 self.dynamic_symbols.append(newsym) 362 symtab = symtab[len(newsym):] 363 364 dynbytes = self.getSectionBytes('.dynamic') 365 while dynbytes: 366 if self.bits == 32: 367 dyn = Elf32Dynamic() 368 elif self.bits == 64: 369 dyn = Elf64Dynamic() 370 else: 371 raise Exception('Platform not supported: %d' % (self.bits)) 372 dyn.vsParse(dynbytes) 373 if dyn.d_tag in Elf32Dynamic.has_string: 374 name = self.getStrtabString(dyn.d_value, ".dynstr") 375 dyn.setName(name) 376 377 self.dynamics.append(dyn) 378 if dyn.d_tag == DT_NULL: # Represents the end 379 break 380 dynbytes = dynbytes[len(dyn):]
381
382 - def _parseRelocs(self):
383 """ 384 Parse all the relocation entries out of any sections with 385 sh_type == SHT_REL 386 """ 387 for sec in self.sections: 388 if sec.sh_type == SHT_REL: 389 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size) 390 while bytes: 391 if self.bits == 32: 392 reloc = Elf32Reloc(self.r_types) 393 elif self.bits == 64: 394 reloc = Elf64Reloc(self.r_types) 395 else: 396 raise Exception('Platform not supported: %d' % (self.bits)) 397 reloc.vsParse(bytes) 398 index = reloc.getSymTabIndex() 399 try: 400 sym = self.dynamic_symbols[index] 401 reloc.setName(sym.getName()) 402 except: 403 traceback.print_exc() 404 self.relocs.append(reloc) 405 bytes = bytes[len(reloc):] 406 407 elif sec.sh_type == SHT_RELA: 408 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size) 409 while bytes: 410 if self.bits == 32: 411 reloc = Elf32Reloca(self.r_types) 412 elif self.bits == 64: 413 reloc = Elf64Reloca(self.r_types) 414 else: 415 raise Exception('Platform not supported: %d' % (self.bits)) 416 reloc.vsParse(bytes) 417 index = reloc.getSymTabIndex() 418 try: 419 sym = self.dynamic_symbols[index] 420 reloc.setName(sym.getName()) 421 except: 422 traceback.print_exc() 423 self.relocs.append(reloc) 424 bytes = bytes[len(reloc):]
425
426 - def getBaseAddress(self):
427 """ 428 For prelinked and main-exe elf binaries, return the 429 value for the loaded base address... 430 """ 431 shrd = self.isSharedObject() 432 plnk = self.isPreLinked() 433 434 # If it's a shared object and *not* prelinked, 435 # we need to select a base address for it 436 # FIXME find non-coliding addr in workspace 437 if shrd and not plnk: 438 return 0x02000000 439 440 # Find the best base address from the list of 441 # section addresses... 442 base = None 443 for pgm in self.getPheaders(): 444 445 if pgm.p_vaddr == 0: 446 continue 447 448 if base == None: 449 base = pgm.p_vaddr 450 continue 451 452 if pgm.p_vaddr < base: 453 base = pgm.p_vaddr 454 455 base &= 0xfffff000 456 457 return base
458
459 - def readAtRva(self, rva, size):
460 ''' 461 Calculate the file offset for the given RVA and 462 read from it... 463 ''' 464 return self.readAtOffset(self.rvaToOffset(rva), size)
465
466 - def rvaToOffset(self, rva):
467 ''' 468 Convert an RVA for this ELF binary to a file offset. 469 ''' 470 baseaddr = 0 471 #if self.isPreLinked() or not self.isSharedObject(): 472 #if not self.isSharedObject(): 473 #print 'SUBTRACTING CALCULATED BASE' 474 #baseaddr = self.getBaseAddress() 475 476 for pgm in self.pheaders: 477 if pgm.p_type != PT_LOAD: 478 continue 479 phrva = pgm.p_vaddr - baseaddr 480 if rva < phrva: 481 continue 482 if rva >= phrva+pgm.p_memsz: 483 continue 484 print 'pgm',hex(pgm.p_vaddr),hex(phrva),hex(rva) 485 # We are inside this pgrm header! 486 rvaoff = rva - phrva 487 return pgm.p_offset + rvaoff 488 489 raise 'omg',hex(rva) 490 return None
491
492 - def readAtOffset(self, off, size):
493 ''' 494 Read from the given file offset. 495 ''' 496 self.fd.seek(off) 497 return self.fd.read(size)
498
499 - def getSection(self, secname):
500 return self.secnames.get(secname,None)
501
502 - def getSections(self):
503 """ 504 Return the array of sections for this Elf 505 """ 506 return list(self.sections)
507
508 - def getSectionBytes(self, secname):
509 sec = self.getSection(secname) 510 if sec == None: 511 return None 512 return self.readAtOffset(sec.sh_offset, sec.sh_size)
513
514 - def getStrtabString(self, offset, section=".strtab"):
515 sec = self.getSection(section) 516 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size) 517 index = bytes.find("\x00", offset) 518 return bytes[offset:index]
519 520
521 - def getDynamics(self):
522 ''' 523 Return a list of the dynamics. 524 ''' 525 return list(self.dynamics)
526
527 - def getDynSyms(self):
528 ''' 529 Return a list of dynamic symbol objects. 530 ''' 531 return self.dynamic_symbols
532
533 - def getRelocs(self):
534 ''' 535 Get the list of relocations. 536 ''' 537 return list(self.relocs)
538
539 - def isPreLinked(self):
540 ''' 541 Returns True if the Elf binary is prelinked. 542 ''' 543 for dyn in self.dynamics: 544 #print repr(dyn) 545 if dyn.d_tag == DT_GNU_PRELINKED: 546 return True 547 # FIXME check for exe vs .so 548 if dyn.d_tag == DT_GNU_CONFLICTSZ: 549 return True 550 return False
551
552 - def isSharedObject(self):
553 ''' 554 Returns true if the given Elf binary is a dynamically shared 555 object. 556 ''' 557 if self.e_type == ET_DYN: 558 return True 559 return False
560
561 - def __repr__(self, verbose=False):
562 """ Returns a string summary of this ELF. If (verbose) the summary will include Symbols, Relocs, Dynamics and Dynamic Symbol tables""" 563 mystr = 'Elf Binary:' 564 mystr+= "\n= Intimate Details:" 565 mystr+= "\n==Magic:\t\t\t\t" + self.e_ident 566 mystr+= "\n==Type:\t\t\t\t\t" + e_types.get(self.e_type) 567 mystr+= "\n==Machine Arch:\t\t\t\t" + e_machine_types.get(self.e_machine) 568 mystr+= "\n==Version:\t\t\t\t%d" % (self.e_version) 569 mystr+= "\n==Entry:\t\t\t\t0x%.8x" % (self.e_entry) 570 mystr+= "\n==Program Headers(offset):\t\t%d (0x%x) bytes" % (self.e_phoff, self.e_phoff) 571 mystr+= "\n==Section Headers(offset):\t\t%d (0x%x) bytes" % (self.e_shoff, self.e_shoff) 572 mystr+= "\n==Flags:\t\t\t\t" + repr(self.e_flags) + " " 573 mystr+= "\n==Elf Header Size:\t\t\t" + repr(self.e_ehsize) + " (" + hex(self.e_ehsize) + " bytes)" 574 mystr+= "\n==Program Header Size:\t\t\t" + repr(self.e_phentsize) + " (" + hex(self.e_phentsize) + " bytes)" 575 mystr+= "\n==Program Header Count:\t\t\t" + repr(self.e_phnum) + " (" + hex(self.e_phnum)+ ")" 576 mystr+= "\n==Section Header Size:\t\t\t" + repr(self.e_shentsize) + " (" + hex(self.e_shentsize) + " bytes)" 577 mystr+= "\n==Section Header Count:\t\t\t" + repr(self.e_shnum) + " (" + hex(self.e_shnum) + ")" 578 mystr+= "\n==Section Header String Index\t\t" + repr(self.e_shstrndx) + " (" + hex(self.e_shstrndx) + " bytes)" 579 580 mystr+= "\n\n= Sections:" 581 for sec in self.sections: 582 mystr+= "\n"+repr(sec) 583 584 mystr+= "\n\n= Program Headers:" 585 for ph in self.pheaders: 586 mystr+= "\n"+repr(ph) 587 588 return mystr
589
590 - def verbrepr(self):
591 mystr = repr(self) 592 593 mystr+= "\n\n= Symbols table:" 594 for sym in self.symbols: 595 mystr+= "\n"+repr(sym) 596 597 mystr+= "\n\n= Relocation table:" 598 for reloc in self.relocs: 599 mystr+= "\n"+repr(reloc) 600 601 mystr+= "\n\n= Dynamics table:" 602 for dyn in self.dynamics: 603 mystr+= "\n"+repr(dyn) 604 605 mystr+= "\n\n= Dynamic Symbols table:" 606 for dyn in self.dynamic_symbols: 607 mystr+= "\n"+repr(dyn) 608 609 return mystr
610
611 - def lookupSymbolName(self, name):
612 """ 613 Lookup symbol entries in this elf binary by name. The result is 614 a long representing the address for the given symbol. Or None if 615 it's not found. 616 """ 617 return self.symbols_by_name.get(name, None)
618
619 - def lookupSymbolAddr(self, address):
620 """ 621 lookup symbols from this elf binary by address. 622 This returns the name for the given symbol or None for not found 623 """ 624 return self.symbols_by_addr.get(address, None)
625
626 - def getPheaders(self):
627 """ 628 Return a list of the program headers for this elf 629 """ 630 return list(self.pheaders)
631
632 - def addSymbol(self, symbol):
633 self.symbols.append(symbol) 634 self.symbols_by_name[symbol.getName()] = symbol 635 self.symbols_by_addr[symbol.st_value] = symbol
636
637 - def getSymbols(self):
638 return self.symbols
639
640 -def getRelocType(val):
641 return val & 0xff
642
643 -def getRelocSymTabIndex(val):
644 return val >> 8
645