]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """ |
2 | ELF file parser. | |
3 | ||
4 | This provides a class ``ELFFile`` that parses an ELF executable in a similar | |
5 | interface to ``ZipFile``. Only the read interface is implemented. | |
6 | ||
7 | Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca | |
8 | ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html | |
9 | """ | |
10 | ||
11 | import enum | |
12 | import os | |
13 | import struct | |
14 | from typing import IO, Optional, Tuple | |
15 | ||
16 | ||
17 | class ELFInvalid(ValueError): | |
18 | pass | |
19 | ||
20 | ||
21 | class EIClass(enum.IntEnum): | |
22 | C32 = 1 | |
23 | C64 = 2 | |
24 | ||
25 | ||
26 | class EIData(enum.IntEnum): | |
27 | Lsb = 1 | |
28 | Msb = 2 | |
29 | ||
30 | ||
31 | class EMachine(enum.IntEnum): | |
32 | I386 = 3 | |
33 | S390 = 22 | |
34 | Arm = 40 | |
35 | X8664 = 62 | |
36 | AArc64 = 183 | |
37 | ||
38 | ||
39 | class ELFFile: | |
40 | """ | |
41 | Representation of an ELF executable. | |
42 | """ | |
43 | ||
44 | def __init__(self, f: IO[bytes]) -> None: | |
45 | self._f = f | |
46 | ||
47 | try: | |
48 | ident = self._read("16B") | |
49 | except struct.error: | |
50 | raise ELFInvalid("unable to parse identification") | |
51 | magic = bytes(ident[:4]) | |
52 | if magic != b"\x7fELF": | |
53 | raise ELFInvalid(f"invalid magic: {magic!r}") | |
54 | ||
55 | self.capacity = ident[4] # Format for program header (bitness). | |
56 | self.encoding = ident[5] # Data structure encoding (endianness). | |
57 | ||
58 | try: | |
59 | # e_fmt: Format for program header. | |
60 | # p_fmt: Format for section header. | |
61 | # p_idx: Indexes to find p_type, p_offset, and p_filesz. | |
62 | e_fmt, self._p_fmt, self._p_idx = { | |
63 | (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB. | |
64 | (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. | |
65 | (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB. | |
66 | (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. | |
67 | }[(self.capacity, self.encoding)] | |
68 | except KeyError: | |
69 | raise ELFInvalid( | |
70 | f"unrecognized capacity ({self.capacity}) or " | |
71 | f"encoding ({self.encoding})" | |
72 | ) | |
73 | ||
74 | try: | |
75 | ( | |
76 | _, | |
77 | self.machine, # Architecture type. | |
78 | _, | |
79 | _, | |
80 | self._e_phoff, # Offset of program header. | |
81 | _, | |
82 | self.flags, # Processor-specific flags. | |
83 | _, | |
84 | self._e_phentsize, # Size of section. | |
85 | self._e_phnum, # Number of sections. | |
86 | ) = self._read(e_fmt) | |
87 | except struct.error as e: | |
88 | raise ELFInvalid("unable to parse machine and section information") from e | |
89 | ||
90 | def _read(self, fmt: str) -> Tuple[int, ...]: | |
91 | return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) | |
92 | ||
93 | @property | |
94 | def interpreter(self) -> Optional[str]: | |
95 | """ | |
96 | The path recorded in the ``PT_INTERP`` section header. | |
97 | """ | |
98 | for index in range(self._e_phnum): | |
99 | self._f.seek(self._e_phoff + self._e_phentsize * index) | |
100 | try: | |
101 | data = self._read(self._p_fmt) | |
102 | except struct.error: | |
103 | continue | |
104 | if data[self._p_idx[0]] != 3: # Not PT_INTERP. | |
105 | continue | |
106 | self._f.seek(data[self._p_idx[1]]) | |
107 | return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") | |
108 | return None |