1 """
2 Kenshoto's Elf parser
3
4 This package will let you use programatic ninja-fu
5 when trying to parse Elf binaries. The API is based
6 around several objects representing constructs in the
7 Elf binary format. The Elf object itself contains
8 parsed metadata and lists of things like section headers
9 and relocation entries. Additionally, most of the
10 objects implement repr() in some form or another which
11 allows you a bunch of readelf-like functionality.
12
13 *Eventually* this API will allow you to modify Elf binaries
14 and spit them back out in working order (not complete, you
15 may notice some of the initial code).
16
17 Send bug reports to Invisigoth or Metr0.
18
19 """
20
21 import os
22 import sys
23 import struct
24 import traceback
25 import zlib
26
27 from stat import *
28 from Elf.elf_lookup import *
29 import vstruct
30 import vstruct.defs.elf as vs_elf
31
32 verbose = False
33
35 """
36 Elf relocation entries consist mostly of "fixup" address which
37 are taken care of by the loader at runtime. Things like
38 GOT entries, PLT jmp codes etc all have an Elf relocation
39 entry.
40 """
41
43 self.name = ""
44 self.r_types = r_types
45
48
51
54
56 return self.r_info & 0xff
57
59 return self.r_types.get(self.getType(),"")
60
65
67 return self.r_info >> 8
68
73
75 return self.r_info >> 8
76
81
83 return self.r_info >> 32
84
89
91 return self.r_info >> 32
92
94 has_string = [DT_NEEDED,DT_SONAME]
95 """
96 An object to represent an Elf dynamic entry.
97 (linker/loader directives)
98 """
99
102
104 name = self.getName()
105 if not name:
106 name = hex(self.d_value)
107 return "%s %s" % (name,self.getTypeName())
108
111
114
116 return dt_types.get(self.d_tag,"Unknown: %s"%hex(self.d_tag))
117
122
127
131
133 return self.st_info & 0xf
134
136 return self.st_info >> 4
137
139 if self.st_value > other.st_value:
140 return 1
141 return -1
142
145
148
150 return "0x%.8x %d %s" % (self.st_value, self.st_size, self.name)
151
156
161
163
166
168 return ph_types.get(self.p_type, "Unknown")
169
171 return '[%35s] VMA: 0x%.8x offset: %8d memsize: %8d align: %8d (filesz: %8d) flags: %x' % (
172 self.getTypeName(),
173 self.p_vaddr,
174 self.p_offset,
175 self.p_memsz,
176 self.p_align,
177 self.p_filesz,
178 self.p_flags)
179
184
189
193
196
199
201 return 'Elf Section: [%20s] VMA: 0x%.8x offset: %8d ent/size: %8d/%8d align: %8d' % (
202 self.name,
203 self.sh_addr,
204 self.sh_offset,
205 self.sh_entsize,
206 self.sh_size,
207 self.sh_addralign)
208
213
218
219 -class Elf(vs_elf.Elf32, vs_elf.Elf64):
220
222
223
224
225 e = vs_elf.Elf32()
226 fd.seek(0)
227 bytes = fd.read(len(e))
228 e.vsParse(bytes)
229 if e.e_machine in e_machine_32:
230 vs_elf.Elf32.__init__(self)
231 self.bits = 32
232 self.psize = 4
233 elif e.e_machine in e_machine_64:
234 vs_elf.Elf64.__init__(self)
235 self.bits = 64
236 self.psize = 8
237 else:
238 raise Exception('Unrecognized e_machine: %d' % e.e_machine)
239
240 self.fd = fd
241
242 bytes = self.readAtOffset(0, len(self))
243 self.vsParse(bytes)
244
245 if self.e_machine == EM_386:
246 self.r_types = r_types_386
247 elif self.e_machine == EM_X86_64:
248 self.r_types = r_types_amd64
249 else:
250 self.r_types = {}
251
252 self.pheaders = []
253 self.sections = []
254 self.secnames = {}
255 self.symbols = []
256 self.relocs = []
257 self.symbols_by_name = {}
258 self.symbols_by_addr = {}
259 self.dynamics = []
260 self.dynamic_symbols = []
261
262 self._parsePheaders()
263 self._parseSections()
264 self._parseSymbols()
265 self._parseDynamic()
266 self._parseRelocs()
267
269 '''
270 Because relocation type names are decided based on the
271 arch, only the Elf knows for sure...
272 '''
273 return self.r_types.get(rtype)
274
276
277 if self.e_phoff:
278 pbase = self.e_phoff
279 plen = self.e_phentsize
280 for i in range(self.e_phnum):
281 if self.bits == 32:
282 pgm = Elf32Pheader()
283 elif self.bits == 64:
284 pgm = Elf64Pheader()
285 else:
286 raise Exception('Platform not supported: %d' % (self.bits))
287
288 bytes = self.readAtOffset(pbase, plen)
289 pgm.vsParse(bytes)
290
291 self.pheaders.append(pgm)
292 pbase += plen
293
295
296 if self.e_shoff:
297
298 sbase = self.e_shoff
299
300 slen = self.e_shentsize
301 for i in range(self.e_shnum):
302 if self.bits == 32:
303 sec = Elf32Section()
304 elif self.bits == 64:
305 sec = Elf64Section()
306 else:
307 raise Exception('Platform not supported: %d' % (self.bits))
308 bytes = self.readAtOffset(sbase, slen)
309 sec.vsParse(bytes)
310 self.sections.append(sec)
311 sbase += slen
312
313
314 strsec = self.sections[self.e_shstrndx]
315 names = self.readAtOffset(strsec.sh_offset,strsec.sh_size)
316 for sec in self.sections:
317 name = names[sec.sh_name:].split("\x00")[0]
318 if len(name) > 0:
319 sec.setName(name)
320 self.secnames[name] = sec
321
323 """
324 Parse out the symbols that this elf binary has for us.
325 """
326 for sec in self.sections:
327 if sec.sh_type == SHT_SYMTAB:
328 symtab = self.readAtOffset(sec.sh_offset, sec.sh_size)
329 while symtab:
330 if self.bits == 32:
331 newsym = Elf32Symbol()
332 elif self.bits == 64:
333 newsym = Elf64Symbol()
334 else:
335 raise Exception('Platform not supported: %d' % (self.bits))
336
337 newsym.vsParse(symtab)
338
339 if newsym.st_name:
340 name = self.getStrtabString(newsym.st_name, ".strtab")
341 newsym.setName(name)
342 self.addSymbol(newsym)
343 symtab = symtab[len(newsym):]
344
381
383 """
384 Parse all the relocation entries out of any sections with
385 sh_type == SHT_REL
386 """
387 for sec in self.sections:
388 if sec.sh_type == SHT_REL:
389 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size)
390 while bytes:
391 if self.bits == 32:
392 reloc = Elf32Reloc(self.r_types)
393 elif self.bits == 64:
394 reloc = Elf64Reloc(self.r_types)
395 else:
396 raise Exception('Platform not supported: %d' % (self.bits))
397 reloc.vsParse(bytes)
398 index = reloc.getSymTabIndex()
399 try:
400 sym = self.dynamic_symbols[index]
401 reloc.setName(sym.getName())
402 except:
403 traceback.print_exc()
404 self.relocs.append(reloc)
405 bytes = bytes[len(reloc):]
406
407 elif sec.sh_type == SHT_RELA:
408 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size)
409 while bytes:
410 if self.bits == 32:
411 reloc = Elf32Reloca(self.r_types)
412 elif self.bits == 64:
413 reloc = Elf64Reloca(self.r_types)
414 else:
415 raise Exception('Platform not supported: %d' % (self.bits))
416 reloc.vsParse(bytes)
417 index = reloc.getSymTabIndex()
418 try:
419 sym = self.dynamic_symbols[index]
420 reloc.setName(sym.getName())
421 except:
422 traceback.print_exc()
423 self.relocs.append(reloc)
424 bytes = bytes[len(reloc):]
425
427 """
428 For prelinked and main-exe elf binaries, return the
429 value for the loaded base address...
430 """
431 shrd = self.isSharedObject()
432 plnk = self.isPreLinked()
433
434
435
436
437 if shrd and not plnk:
438 return 0x02000000
439
440
441
442 base = None
443 for pgm in self.getPheaders():
444
445 if pgm.p_vaddr == 0:
446 continue
447
448 if base == None:
449 base = pgm.p_vaddr
450 continue
451
452 if pgm.p_vaddr < base:
453 base = pgm.p_vaddr
454
455 base &= 0xfffff000
456
457 return base
458
460 '''
461 Calculate the file offset for the given RVA and
462 read from it...
463 '''
464 return self.readAtOffset(self.rvaToOffset(rva), size)
465
467 '''
468 Convert an RVA for this ELF binary to a file offset.
469 '''
470 baseaddr = 0
471
472
473
474
475
476 for pgm in self.pheaders:
477 if pgm.p_type != PT_LOAD:
478 continue
479 phrva = pgm.p_vaddr - baseaddr
480 if rva < phrva:
481 continue
482 if rva >= phrva+pgm.p_memsz:
483 continue
484 print 'pgm',hex(pgm.p_vaddr),hex(phrva),hex(rva)
485
486 rvaoff = rva - phrva
487 return pgm.p_offset + rvaoff
488
489 raise 'omg',hex(rva)
490 return None
491
493 '''
494 Read from the given file offset.
495 '''
496 self.fd.seek(off)
497 return self.fd.read(size)
498
500 return self.secnames.get(secname,None)
501
503 """
504 Return the array of sections for this Elf
505 """
506 return list(self.sections)
507
509 sec = self.getSection(secname)
510 if sec == None:
511 return None
512 return self.readAtOffset(sec.sh_offset, sec.sh_size)
513
515 sec = self.getSection(section)
516 bytes = self.readAtOffset(sec.sh_offset, sec.sh_size)
517 index = bytes.find("\x00", offset)
518 return bytes[offset:index]
519
520
522 '''
523 Return a list of the dynamics.
524 '''
525 return list(self.dynamics)
526
528 '''
529 Return a list of dynamic symbol objects.
530 '''
531 return self.dynamic_symbols
532
534 '''
535 Get the list of relocations.
536 '''
537 return list(self.relocs)
538
540 '''
541 Returns True if the Elf binary is prelinked.
542 '''
543 for dyn in self.dynamics:
544
545 if dyn.d_tag == DT_GNU_PRELINKED:
546 return True
547
548 if dyn.d_tag == DT_GNU_CONFLICTSZ:
549 return True
550 return False
551
553 '''
554 Returns true if the given Elf binary is a dynamically shared
555 object.
556 '''
557 if self.e_type == ET_DYN:
558 return True
559 return False
560
562 """ Returns a string summary of this ELF. If (verbose) the summary will include Symbols, Relocs, Dynamics and Dynamic Symbol tables"""
563 mystr = 'Elf Binary:'
564 mystr+= "\n= Intimate Details:"
565 mystr+= "\n==Magic:\t\t\t\t" + self.e_ident
566 mystr+= "\n==Type:\t\t\t\t\t" + e_types.get(self.e_type)
567 mystr+= "\n==Machine Arch:\t\t\t\t" + e_machine_types.get(self.e_machine)
568 mystr+= "\n==Version:\t\t\t\t%d" % (self.e_version)
569 mystr+= "\n==Entry:\t\t\t\t0x%.8x" % (self.e_entry)
570 mystr+= "\n==Program Headers(offset):\t\t%d (0x%x) bytes" % (self.e_phoff, self.e_phoff)
571 mystr+= "\n==Section Headers(offset):\t\t%d (0x%x) bytes" % (self.e_shoff, self.e_shoff)
572 mystr+= "\n==Flags:\t\t\t\t" + repr(self.e_flags) + " "
573 mystr+= "\n==Elf Header Size:\t\t\t" + repr(self.e_ehsize) + " (" + hex(self.e_ehsize) + " bytes)"
574 mystr+= "\n==Program Header Size:\t\t\t" + repr(self.e_phentsize) + " (" + hex(self.e_phentsize) + " bytes)"
575 mystr+= "\n==Program Header Count:\t\t\t" + repr(self.e_phnum) + " (" + hex(self.e_phnum)+ ")"
576 mystr+= "\n==Section Header Size:\t\t\t" + repr(self.e_shentsize) + " (" + hex(self.e_shentsize) + " bytes)"
577 mystr+= "\n==Section Header Count:\t\t\t" + repr(self.e_shnum) + " (" + hex(self.e_shnum) + ")"
578 mystr+= "\n==Section Header String Index\t\t" + repr(self.e_shstrndx) + " (" + hex(self.e_shstrndx) + " bytes)"
579
580 mystr+= "\n\n= Sections:"
581 for sec in self.sections:
582 mystr+= "\n"+repr(sec)
583
584 mystr+= "\n\n= Program Headers:"
585 for ph in self.pheaders:
586 mystr+= "\n"+repr(ph)
587
588 return mystr
589
591 mystr = repr(self)
592
593 mystr+= "\n\n= Symbols table:"
594 for sym in self.symbols:
595 mystr+= "\n"+repr(sym)
596
597 mystr+= "\n\n= Relocation table:"
598 for reloc in self.relocs:
599 mystr+= "\n"+repr(reloc)
600
601 mystr+= "\n\n= Dynamics table:"
602 for dyn in self.dynamics:
603 mystr+= "\n"+repr(dyn)
604
605 mystr+= "\n\n= Dynamic Symbols table:"
606 for dyn in self.dynamic_symbols:
607 mystr+= "\n"+repr(dyn)
608
609 return mystr
610
612 """
613 Lookup symbol entries in this elf binary by name. The result is
614 a long representing the address for the given symbol. Or None if
615 it's not found.
616 """
617 return self.symbols_by_name.get(name, None)
618
620 """
621 lookup symbols from this elf binary by address.
622 This returns the name for the given symbol or None for not found
623 """
624 return self.symbols_by_addr.get(address, None)
625
627 """
628 Return a list of the program headers for this elf
629 """
630 return list(self.pheaders)
631
633 self.symbols.append(symbol)
634 self.symbols_by_name[symbol.getName()] = symbol
635 self.symbols_by_addr[symbol.st_value] = symbol
636
639
642
645