Coverage for src/debputy/elf_util.py: 76%

101 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-10-12 15:06 +0000

1import io 

2import os 

3import struct 

4from typing import List, Optional, Tuple 

5from collections.abc import Callable, Iterable 

6 

7from debputy.filesystem_scan import FSPath 

8from debputy.plugin.api import VirtualPath 

9 

10ELF_HEADER_SIZE32 = 136 

11ELF_HEADER_SIZE64 = 232 

12ELF_MAGIC = b"\x7fELF" 

13ELF_VERSION = 0x00000001 

14ELF_ENDIAN_LE = 0x01 

15ELF_ENDIAN_BE = 0x02 

16ELF_TYPE_EXECUTABLE = 0x0002 

17ELF_TYPE_SHARED_OBJECT = 0x0003 

18 

19ELF_LINKING_TYPE_ANY = None 

20ELF_LINKING_TYPE_DYNAMIC = True 

21ELF_LINKING_TYPE_STATIC = False 

22 

23ELF_EI_ELFCLASS32 = 1 

24ELF_EI_ELFCLASS64 = 2 

25 

26ELF_PT_DYNAMIC = 2 

27 

28ELF_EI_NIDENT = 0x10 

29 

30# ELF header format: 

31# typedef struct { 

32# unsigned char e_ident[EI_NIDENT]; # <-- 16 / 0x10 bytes 

33# uint16_t e_type; 

34# uint16_t e_machine; 

35# uint32_t e_version; 

36# ElfN_Addr e_entry; 

37# ElfN_Off e_phoff; 

38# ElfN_Off e_shoff; 

39# uint32_t e_flags; 

40# uint16_t e_ehsize; 

41# uint16_t e_phentsize; 

42# uint16_t e_phnum; 

43# uint16_t e_shentsize; 

44# uint16_t e_shnum; 

45# uint16_t e_shstrndx; 

46# } ElfN_Ehdr; 

47 

48 

49class IncompleteFileError(RuntimeError): 

50 pass 

51 

52 

53def is_so_or_exec_elf_file( 

54 path: VirtualPath, 

55 *, 

56 assert_linking_type: bool | None = ELF_LINKING_TYPE_ANY, 

57) -> bool: 

58 is_elf, linking_type = _read_elf_file( 

59 path, 

60 determine_linking_type=assert_linking_type is not None, 

61 ) 

62 return is_elf and ( 

63 assert_linking_type is ELF_LINKING_TYPE_ANY 

64 or assert_linking_type == linking_type 

65 ) 

66 

67 

68def _read_elf_file( 

69 path: VirtualPath, 

70 *, 

71 determine_linking_type: bool = False, 

72) -> tuple[bool, bool | None]: 

73 buffer_size = 4096 

74 fd_buffer = bytearray(buffer_size) 

75 linking_type = None 

76 fd: io.BufferedReader 

77 with path.open(byte_io=True, buffering=io.DEFAULT_BUFFER_SIZE) as fd: 

78 len_elf_header_raw = fd.readinto(fd_buffer) 

79 if ( 

80 not fd_buffer 

81 or len_elf_header_raw < ELF_HEADER_SIZE32 

82 or not fd_buffer.startswith(ELF_MAGIC) 

83 ): 

84 return False, None 

85 

86 elf_ei_class = fd_buffer[4] 

87 endian_raw = fd_buffer[5] 

88 if endian_raw == ELF_ENDIAN_LE: 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true

89 endian = "<" 

90 elif endian_raw == ELF_ENDIAN_BE: 

91 endian = ">" 

92 else: 

93 return False, None 

94 

95 if elf_ei_class == ELF_EI_ELFCLASS64: 95 ↛ 101line 95 didn't jump to line 101 because the condition on line 95 was always true

96 offset_size = "Q" 

97 # We know it needs to be a 64bit ELF, then the header must be 

98 # large enough for that. 

99 if len_elf_header_raw < ELF_HEADER_SIZE64: 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 return False, None 

101 elif elf_ei_class == ELF_EI_ELFCLASS32: 

102 offset_size = "L" 

103 else: 

104 return False, None 

105 

106 elf_type, _elf_machine, elf_version = struct.unpack_from( 

107 f"{endian}HHL", fd_buffer, offset=ELF_EI_NIDENT 

108 ) 

109 if elf_version != ELF_VERSION: 109 ↛ 110line 109 didn't jump to line 110 because the condition on line 109 was never true

110 return False, None 

111 if elf_type not in (ELF_TYPE_EXECUTABLE, ELF_TYPE_SHARED_OBJECT): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 return False, None 

113 

114 if determine_linking_type: 114 ↛ 121line 114 didn't jump to line 121

115 linking_type = _determine_elf_linking_type( 

116 fd, fd_buffer, endian, offset_size 

117 ) 

118 if linking_type is None: 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true

119 return False, None 

120 

121 return True, linking_type 

122 

123 

124def _determine_elf_linking_type(fd, fd_buffer, endian, offset_size) -> bool | None: 

125 # To check the linking, we look for a DYNAMICALLY program header 

126 # In other words, we assume static linking by default. 

127 

128 linking_type = ELF_LINKING_TYPE_STATIC 

129 # To do that, we need to read a bit more of the ELF header to 

130 # locate the Program header table. 

131 # 

132 # Reading - in order at offset 0x18: 

133 # * e_entry (ignored) 

134 # * e_phoff 

135 # * e_shoff (ignored) 

136 # * e_flags (ignored) 

137 # * e_ehsize (ignored) 

138 # * e_phentsize 

139 # * e_phnum 

140 _, e_phoff, _, _, _, e_phentsize, e_phnum = struct.unpack_from( 

141 f"{endian}{offset_size}{offset_size}{offset_size}LHHH", 

142 fd_buffer, 

143 offset=ELF_EI_NIDENT + 8, 

144 ) 

145 

146 # man 5 elf suggests that Program headers can be absent. If so, 

147 # e_phnum will be zero - but we assume the same for e_phentsize. 

148 if e_phnum == 0: 148 ↛ 149line 148 didn't jump to line 149 because the condition on line 148 was never true

149 return linking_type 

150 

151 # Program headers must be at least 4 bytes for this code to do 

152 # anything sanely. In practise, it must be larger than that 

153 # as well. Accordingly, at best this is a corrupted ELF file. 

154 if e_phentsize < 4: 154 ↛ 155line 154 didn't jump to line 155 because the condition on line 154 was never true

155 return None 

156 

157 fd.seek(e_phoff, os.SEEK_SET) 

158 unpack_format = f"{endian}L" 

159 try: 

160 for program_header_raw in _read_bytes_iteratively(fd, e_phentsize, e_phnum): 160 ↛ 168line 160 didn't jump to line 168 because the loop on line 160 didn't complete

161 p_type = struct.unpack_from(unpack_format, program_header_raw)[0] 

162 if p_type == ELF_PT_DYNAMIC: 

163 linking_type = ELF_LINKING_TYPE_DYNAMIC 

164 break 

165 except IncompleteFileError: 

166 return None 

167 

168 return linking_type 

169 

170 

171def _read_bytes_iteratively( 

172 fd: io.BufferedReader, 

173 object_size: int, 

174 object_count: int, 

175) -> Iterable[bytes]: 

176 total_size = object_size * object_count 

177 bytes_remaining = total_size 

178 # FIXME: improve this to read larger chunks and yield them one-by-one 

179 byte_buffer = bytearray(object_size) 

180 

181 while bytes_remaining > 0: 181 ↛ 188line 181 didn't jump to line 188 because the condition on line 181 was always true

182 n = fd.readinto(byte_buffer) 

183 if n != object_size: 183 ↛ 184line 183 didn't jump to line 184 because the condition on line 183 was never true

184 break 

185 bytes_remaining -= n 

186 yield byte_buffer 

187 

188 if bytes_remaining: 

189 raise IncompleteFileError() 

190 

191 

192def find_all_elf_files( 

193 fs_root: VirtualPath, 

194 *, 

195 walk_filter: Callable[[VirtualPath, list[VirtualPath]], bool] | None = None, 

196 with_linking_type: bool | None = ELF_LINKING_TYPE_ANY, 

197) -> list[VirtualPath]: 

198 matches: list[VirtualPath] = [] 

199 # FIXME: Implementation detail that fs_root is always `FSPath` and has `.walk()` 

200 assert isinstance(fs_root, FSPath) 

201 for path, children in fs_root.walk(): 

202 if walk_filter is not None and not walk_filter(path, children): 

203 continue 

204 if not path.is_file or path.size < ELF_HEADER_SIZE32: 

205 continue 

206 if not is_so_or_exec_elf_file(path, assert_linking_type=with_linking_type): 

207 continue 

208 matches.append(path) 

209 return matches