Coverage for src/debputy/elf_util.py: 76%
101 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
1import io
2import os
3import struct
4from typing import List, Optional, Tuple
5from collections.abc import Callable, Iterable
7from debputy.filesystem_scan import FSPath
8from debputy.plugin.api import VirtualPath
10ELF_HEADER_SIZE32 = 136
11ELF_HEADER_SIZE64 = 232
12ELF_MAGIC = b"\x7fELF"
13ELF_VERSION = 0x00000001
14ELF_ENDIAN_LE = 0x01
15ELF_ENDIAN_BE = 0x02
16ELF_TYPE_EXECUTABLE = 0x0002
17ELF_TYPE_SHARED_OBJECT = 0x0003
19ELF_LINKING_TYPE_ANY = None
20ELF_LINKING_TYPE_DYNAMIC = True
21ELF_LINKING_TYPE_STATIC = False
23ELF_EI_ELFCLASS32 = 1
24ELF_EI_ELFCLASS64 = 2
26ELF_PT_DYNAMIC = 2
28ELF_EI_NIDENT = 0x10
30# ELF header format:
31# typedef struct {
32# unsigned char e_ident[EI_NIDENT]; # <-- 16 / 0x10 bytes
33# uint16_t e_type;
34# uint16_t e_machine;
35# uint32_t e_version;
36# ElfN_Addr e_entry;
37# ElfN_Off e_phoff;
38# ElfN_Off e_shoff;
39# uint32_t e_flags;
40# uint16_t e_ehsize;
41# uint16_t e_phentsize;
42# uint16_t e_phnum;
43# uint16_t e_shentsize;
44# uint16_t e_shnum;
45# uint16_t e_shstrndx;
46# } ElfN_Ehdr;
49class IncompleteFileError(RuntimeError):
50 pass
53def is_so_or_exec_elf_file(
54 path: VirtualPath,
55 *,
56 assert_linking_type: bool | None = ELF_LINKING_TYPE_ANY,
57) -> bool:
58 is_elf, linking_type = _read_elf_file(
59 path,
60 determine_linking_type=assert_linking_type is not None,
61 )
62 return is_elf and (
63 assert_linking_type is ELF_LINKING_TYPE_ANY
64 or assert_linking_type == linking_type
65 )
68def _read_elf_file(
69 path: VirtualPath,
70 *,
71 determine_linking_type: bool = False,
72) -> tuple[bool, bool | None]:
73 buffer_size = 4096
74 fd_buffer = bytearray(buffer_size)
75 linking_type = None
76 fd: io.BufferedReader
77 with path.open(byte_io=True, buffering=io.DEFAULT_BUFFER_SIZE) as fd:
78 len_elf_header_raw = fd.readinto(fd_buffer)
79 if (
80 not fd_buffer
81 or len_elf_header_raw < ELF_HEADER_SIZE32
82 or not fd_buffer.startswith(ELF_MAGIC)
83 ):
84 return False, None
86 elf_ei_class = fd_buffer[4]
87 endian_raw = fd_buffer[5]
88 if endian_raw == ELF_ENDIAN_LE: 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true
89 endian = "<"
90 elif endian_raw == ELF_ENDIAN_BE:
91 endian = ">"
92 else:
93 return False, None
95 if elf_ei_class == ELF_EI_ELFCLASS64: 95 ↛ 101line 95 didn't jump to line 101 because the condition on line 95 was always true
96 offset_size = "Q"
97 # We know it needs to be a 64bit ELF, then the header must be
98 # large enough for that.
99 if len_elf_header_raw < ELF_HEADER_SIZE64: 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true
100 return False, None
101 elif elf_ei_class == ELF_EI_ELFCLASS32:
102 offset_size = "L"
103 else:
104 return False, None
106 elf_type, _elf_machine, elf_version = struct.unpack_from(
107 f"{endian}HHL", fd_buffer, offset=ELF_EI_NIDENT
108 )
109 if elf_version != ELF_VERSION: 109 ↛ 110line 109 didn't jump to line 110 because the condition on line 109 was never true
110 return False, None
111 if elf_type not in (ELF_TYPE_EXECUTABLE, ELF_TYPE_SHARED_OBJECT): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true
112 return False, None
114 if determine_linking_type: 114 ↛ 121line 114 didn't jump to line 121
115 linking_type = _determine_elf_linking_type(
116 fd, fd_buffer, endian, offset_size
117 )
118 if linking_type is None: 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true
119 return False, None
121 return True, linking_type
124def _determine_elf_linking_type(fd, fd_buffer, endian, offset_size) -> bool | None:
125 # To check the linking, we look for a DYNAMICALLY program header
126 # In other words, we assume static linking by default.
128 linking_type = ELF_LINKING_TYPE_STATIC
129 # To do that, we need to read a bit more of the ELF header to
130 # locate the Program header table.
131 #
132 # Reading - in order at offset 0x18:
133 # * e_entry (ignored)
134 # * e_phoff
135 # * e_shoff (ignored)
136 # * e_flags (ignored)
137 # * e_ehsize (ignored)
138 # * e_phentsize
139 # * e_phnum
140 _, e_phoff, _, _, _, e_phentsize, e_phnum = struct.unpack_from(
141 f"{endian}{offset_size}{offset_size}{offset_size}LHHH",
142 fd_buffer,
143 offset=ELF_EI_NIDENT + 8,
144 )
146 # man 5 elf suggests that Program headers can be absent. If so,
147 # e_phnum will be zero - but we assume the same for e_phentsize.
148 if e_phnum == 0: 148 ↛ 149line 148 didn't jump to line 149 because the condition on line 148 was never true
149 return linking_type
151 # Program headers must be at least 4 bytes for this code to do
152 # anything sanely. In practise, it must be larger than that
153 # as well. Accordingly, at best this is a corrupted ELF file.
154 if e_phentsize < 4: 154 ↛ 155line 154 didn't jump to line 155 because the condition on line 154 was never true
155 return None
157 fd.seek(e_phoff, os.SEEK_SET)
158 unpack_format = f"{endian}L"
159 try:
160 for program_header_raw in _read_bytes_iteratively(fd, e_phentsize, e_phnum): 160 ↛ 168line 160 didn't jump to line 168 because the loop on line 160 didn't complete
161 p_type = struct.unpack_from(unpack_format, program_header_raw)[0]
162 if p_type == ELF_PT_DYNAMIC:
163 linking_type = ELF_LINKING_TYPE_DYNAMIC
164 break
165 except IncompleteFileError:
166 return None
168 return linking_type
171def _read_bytes_iteratively(
172 fd: io.BufferedReader,
173 object_size: int,
174 object_count: int,
175) -> Iterable[bytes]:
176 total_size = object_size * object_count
177 bytes_remaining = total_size
178 # FIXME: improve this to read larger chunks and yield them one-by-one
179 byte_buffer = bytearray(object_size)
181 while bytes_remaining > 0: 181 ↛ 188line 181 didn't jump to line 188 because the condition on line 181 was always true
182 n = fd.readinto(byte_buffer)
183 if n != object_size: 183 ↛ 184line 183 didn't jump to line 184 because the condition on line 183 was never true
184 break
185 bytes_remaining -= n
186 yield byte_buffer
188 if bytes_remaining:
189 raise IncompleteFileError()
192def find_all_elf_files(
193 fs_root: VirtualPath,
194 *,
195 walk_filter: Callable[[VirtualPath, list[VirtualPath]], bool] | None = None,
196 with_linking_type: bool | None = ELF_LINKING_TYPE_ANY,
197) -> list[VirtualPath]:
198 matches: list[VirtualPath] = []
199 # FIXME: Implementation detail that fs_root is always `FSPath` and has `.walk()`
200 assert isinstance(fs_root, FSPath)
201 for path, children in fs_root.walk():
202 if walk_filter is not None and not walk_filter(path, children):
203 continue
204 if not path.is_file or path.size < ELF_HEADER_SIZE32:
205 continue
206 if not is_so_or_exec_elf_file(path, assert_linking_type=with_linking_type):
207 continue
208 matches.append(path)
209 return matches