Coverage for src/debputy/packager_provided_files.py: 86%

204 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2025-01-27 13:59 +0000

1import collections 

2import dataclasses 

3from typing import Mapping, Iterable, Dict, List, Optional, Tuple, Sequence, Container 

4 

5from debputy.packages import BinaryPackage 

6from debputy.plugin.api import VirtualPath 

7from debputy.plugin.api.impl_types import PackagerProvidedFileClassSpec 

8from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo 

9 

10 

11_KNOWN_NON_PPFS = frozenset( 

12 { 

13 # Some of these overlap with the _KNOWN_NON_TYPO_EXTENSIONS below 

14 # This one is a quicker check. The _KNOWN_NON_TYPO_EXTENSIONS is a general (but more 

15 # expensive check). 

16 "gbp.conf", # Typo matches with `gbp.config` (dh_installdebconf) in two edits steps 

17 "salsa-ci.yml", # Typo matches with `salsa-ci.wm` (dh_installwm) in two edits steps 

18 # No reason to check any of these as they are never PPFs 

19 "clean", 

20 "control", 

21 "compat", 

22 "debputy.manifest", 

23 "rules", 

24 # NB: changelog and copyright are (de facto) ppfs, so they are deliberately omitted 

25 } 

26) 

27 

28_KNOWN_NON_TYPO_EXTENSIONS = frozenset( 

29 { 

30 "conf", 

31 "sh", 

32 "yml", 

33 "yaml", 

34 "json", 

35 "bash", 

36 "pl", 

37 "py", 

38 "md", 

39 # Fairly common image format in older packages 

40 "xpm", 

41 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives. 

42 "bak", 

43 "tmp", 

44 "temp", 

45 "orig", 

46 "rej", 

47 } 

48) 

49 

50 

51@dataclasses.dataclass(frozen=True, slots=True) 

52class PackagerProvidedFile: 

53 path: VirtualPath 

54 package_name: str 

55 installed_as_basename: str 

56 provided_key: str 

57 definition: PackagerProvidedFileClassSpec 

58 match_priority: int = 0 

59 fuzzy_match: bool = False 

60 uses_explicit_package_name: bool = False 

61 name_segment: Optional[str] = None 

62 architecture_restriction: Optional[str] = None 

63 expected_path: Optional[str] = None 

64 

65 def compute_dest(self) -> Tuple[str, str]: 

66 return self.definition.compute_dest( 

67 self.installed_as_basename, 

68 owning_package=self.package_name, 

69 path=self.path, 

70 ) 

71 

72 

73@dataclasses.dataclass(frozen=True, slots=True) 

74class PerPackagePackagerProvidedResult: 

75 auto_installable: List[PackagerProvidedFile] 

76 reserved_only: Dict[str, List[PackagerProvidedFile]] 

77 

78 

79def _find_package_name_prefix( 

80 binary_packages: Mapping[str, BinaryPackage], 

81 main_binary_package: str, 

82 max_periods_in_package_name: int, 

83 path: VirtualPath, 

84 *, 

85 allow_fuzzy_matches: bool = False, 

86) -> Iterable[Tuple[str, str, bool, bool]]: 

87 if max_periods_in_package_name < 1: 

88 prefix, remaining = path.name.split(".", 1) 

89 package_name = prefix 

90 bug_950723 = False 

91 if allow_fuzzy_matches and package_name.endswith("@"): 

92 package_name = package_name[:-1] 

93 bug_950723 = True 

94 if package_name in binary_packages: 

95 yield package_name, remaining, True, bug_950723 

96 else: 

97 yield main_binary_package, path.name, False, False 

98 return 

99 

100 parts = path.name.split(".", max_periods_in_package_name + 1) 

101 for p in range(len(parts) - 1, 0, -1): 

102 name = ".".join(parts[0:p]) 

103 bug_950723 = False 

104 if allow_fuzzy_matches and name.endswith("@"): 104 ↛ 105line 104 didn't jump to line 105 because the condition on line 104 was never true

105 name = name[:-1] 

106 bug_950723 = True 

107 

108 if name in binary_packages: 

109 remaining = ".".join(parts[p:]) 

110 yield name, remaining, True, bug_950723 

111 # main package case 

112 yield main_binary_package, path.name, False, False 

113 

114 

115def _iterate_stem_splits(basename: str) -> Tuple[str, str, int]: 

116 stem = basename 

117 period_count = stem.count(".") 

118 yield stem, None, period_count 

119 install_as_name = "" 

120 while period_count > 0: 

121 period_count -= 1 

122 install_as_name_part, stem = stem.split(".", 1) 

123 install_as_name = ( 

124 install_as_name + "." + install_as_name_part 

125 if install_as_name != "" 

126 else install_as_name_part 

127 ) 

128 yield stem, install_as_name, period_count 

129 

130 

131def _find_definition( 

132 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

133 basename: str, 

134 *, 

135 period2stems: Optional[Mapping[int, Sequence[str]]] = None, 

136 had_arch: bool = False, 

137) -> Tuple[Optional[str], Optional[PackagerProvidedFileClassSpec], Optional[str]]: 

138 for stem, install_as_name, period_count in _iterate_stem_splits(basename): 

139 definition = packager_provided_files.get(stem) 

140 if definition is not None: 

141 return install_as_name, definition, None 

142 if not period2stems: 

143 continue 

144 stems = period2stems.get(period_count) 

145 

146 if not stems: 

147 continue 

148 # If the stem is also the extension and a known one at that, then 

149 # we do not consider it a typo match (to avoid false positives). 

150 # 

151 # We also ignore "foo.1" since manpages are kind of common. 

152 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()): 

153 continue 

154 max_edit_distance = 2 if len(stem) > 3 else 1 

155 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance) 

156 if matches is not None and len(matches) == 1: 

157 definition = packager_provided_files[matches[0]] 

158 return install_as_name, definition, stem 

159 return None, None, None 

160 

161 

162def _check_mismatches( 

163 path: VirtualPath, 

164 definition: PackagerProvidedFileClassSpec, 

165 owning_package: BinaryPackage, 

166 install_as_name: Optional[str], 

167 had_arch: bool, 

168) -> None: 

169 if install_as_name is not None and not definition.allow_name_segment: 169 ↛ 170line 169 didn't jump to line 170 because the condition on line 169 was never true

170 _error( 

171 f'The file "{path.fs_path}" looks like a packager provided file for' 

172 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' 

173 " However, this file type does not allow custom naming. The file type was registered" 

174 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

175 " to file a bug/feature request." 

176 ) 

177 if had_arch: 

178 if owning_package.is_arch_all: 178 ↛ 179line 178 didn't jump to line 179 because the condition on line 178 was never true

179 _error( 

180 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

181 f" {owning_package.name} of type {definition.stem}." 

182 " However, the package in question is arch:all. The use of architecture specific files" 

183 " for arch:all packages does not make sense." 

184 ) 

185 if not definition.allow_architecture_segment: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 _error( 

187 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

188 f" {owning_package.name} of type {definition.stem}." 

189 " However, this file type does not allow architecture specific variants. The file type was registered" 

190 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

191 " to file a bug/feature request." 

192 ) 

193 

194 

195def _split_path( 

196 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

197 binary_packages: Mapping[str, BinaryPackage], 

198 main_binary_package: str, 

199 max_periods_in_package_name: int, 

200 path: VirtualPath, 

201 *, 

202 allow_fuzzy_matches: bool = False, 

203 period2stems: Optional[Mapping[int, Sequence[str]]] = None, 

204) -> Iterable[PackagerProvidedFile]: 

205 owning_package_name = main_binary_package 

206 basename = path.name 

207 match_priority = 0 

208 had_arch = False 

209 if "." not in basename: 

210 definition = packager_provided_files.get(basename) 

211 if definition is None: 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true

212 return 

213 if definition.packageless_is_fallback_for_all_packages: 

214 yield from ( 

215 PackagerProvidedFile( 

216 path=path, 

217 package_name=n, 

218 installed_as_basename=n, 

219 provided_key=".UNNAMED.", 

220 definition=definition, 

221 match_priority=match_priority, 

222 fuzzy_match=False, 

223 uses_explicit_package_name=False, 

224 name_segment=None, 

225 architecture_restriction=None, 

226 ) 

227 for n in binary_packages 

228 ) 

229 else: 

230 yield PackagerProvidedFile( 

231 path=path, 

232 package_name=owning_package_name, 

233 installed_as_basename=owning_package_name, 

234 provided_key=".UNNAMED.", 

235 definition=definition, 

236 match_priority=match_priority, 

237 fuzzy_match=False, 

238 uses_explicit_package_name=False, 

239 name_segment=None, 

240 architecture_restriction=None, 

241 ) 

242 return 

243 

244 for ( 

245 owning_package_name, 

246 basename, 

247 explicit_package, 

248 bug_950723, 

249 ) in _find_package_name_prefix( 

250 binary_packages, 

251 main_binary_package, 

252 max_periods_in_package_name, 

253 path, 

254 allow_fuzzy_matches=allow_fuzzy_matches, 

255 ): 

256 owning_package = binary_packages[owning_package_name] 

257 match_priority = 1 if explicit_package else 0 

258 fuzzy_match = False 

259 arch_restriction: Optional[str] = None 

260 

261 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 261 ↛ 262line 261 didn't jump to line 262 because the condition on line 261 was never true

262 basename = basename[:-3] 

263 fuzzy_match = True 

264 

265 if "." in basename: 

266 remaining, last_word = basename.rsplit(".", 1) 

267 # We cannot use "resolved_architecture" as it would return "all". 

268 if last_word == owning_package.package_deb_architecture_variable("ARCH"): 

269 match_priority = 3 

270 basename = remaining 

271 arch_restriction = last_word 

272 elif last_word == owning_package.package_deb_architecture_variable( 272 ↛ 275line 272 didn't jump to line 275 because the condition on line 272 was never true

273 "ARCH_OS" 

274 ): 

275 match_priority = 2 

276 basename = remaining 

277 arch_restriction = last_word 

278 elif last_word == "all" and owning_package.is_arch_all: 278 ↛ 281line 278 didn't jump to line 281 because the condition on line 278 was never true

279 # This case does not make sense, but we detect it, so we can report an error 

280 # via _check_mismatches. 

281 match_priority = -1 

282 basename = remaining 

283 arch_restriction = last_word 

284 

285 install_as_name, definition, typoed_stem = _find_definition( 

286 packager_provided_files, 

287 basename, 

288 period2stems=period2stems, 

289 had_arch=bool(arch_restriction), 

290 ) 

291 if definition is None: 

292 continue 

293 

294 # Note: bug_950723 implies allow_fuzzy_matches 

295 if bug_950723 and not definition.bug_950723: 295 ↛ 296line 295 didn't jump to line 296 because the condition on line 295 was never true

296 continue 

297 

298 if not allow_fuzzy_matches: 

299 # LSP/Lint checks here but should not use `_check_mismatches` as 

300 # the hard error disrupts them. 

301 _check_mismatches( 

302 path, 

303 definition, 

304 owning_package, 

305 install_as_name, 

306 arch_restriction is not None, 

307 ) 

308 

309 expected_path: Optional[str] = None 

310 if ( 

311 definition.packageless_is_fallback_for_all_packages 

312 and install_as_name is None 

313 and not had_arch 

314 and not explicit_package 

315 and arch_restriction is None 

316 ): 

317 if typoed_stem is not None: 317 ↛ 318line 317 didn't jump to line 318

318 parent_path = ( 

319 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

320 ) 

321 expected_path = f"{parent_path}{definition.stem}" 

322 if fuzzy_match and path.name.endswith(".in"): 

323 expected_path += ".in" 

324 yield from ( 

325 PackagerProvidedFile( 

326 path=path, 

327 package_name=n, 

328 installed_as_basename=f"{n}@" if bug_950723 else n, 

329 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", 

330 definition=definition, 

331 match_priority=match_priority, 

332 fuzzy_match=fuzzy_match, 

333 uses_explicit_package_name=False, 

334 name_segment=None, 

335 architecture_restriction=None, 

336 expected_path=expected_path, 

337 ) 

338 for n in binary_packages 

339 ) 

340 else: 

341 provided_key = ( 

342 install_as_name if install_as_name is not None else ".UNNAMED." 

343 ) 

344 basename = ( 

345 install_as_name if install_as_name is not None else owning_package_name 

346 ) 

347 if bug_950723: 

348 provided_key = f"{provided_key}@" 

349 basename = f"{basename}@" 

350 package_prefix = f"{owning_package_name}@" 

351 else: 

352 package_prefix = owning_package_name 

353 if typoed_stem: 

354 parent_path = ( 

355 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

356 ) 

357 basename = definition.stem 

358 if install_as_name is not None: 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true

359 basename = f"{install_as_name}.{basename}" 

360 if explicit_package: 

361 basename = f"{package_prefix}.{basename}" 

362 if arch_restriction is not None and arch_restriction != "all": 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true

363 basename = f"{basename}.{arch_restriction}" 

364 expected_path = f"{parent_path}{basename}" 

365 if fuzzy_match and path.name.endswith(".in"): 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 expected_path += ".in" 

367 yield PackagerProvidedFile( 

368 path=path, 

369 package_name=owning_package_name, 

370 installed_as_basename=basename, 

371 provided_key=provided_key, 

372 definition=definition, 

373 match_priority=match_priority, 

374 fuzzy_match=fuzzy_match, 

375 uses_explicit_package_name=bool(explicit_package), 

376 name_segment=install_as_name, 

377 architecture_restriction=arch_restriction, 

378 expected_path=expected_path, 

379 ) 

380 return 

381 

382 

383def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]: 

384 result: Dict[int, List[str]] = {} 

385 for stem in stems: 

386 period_count = stem.count(".") 

387 matched_stems = result.get(period_count) 

388 if not matched_stems: 

389 matched_stems = [stem] 

390 result[period_count] = matched_stems 

391 else: 

392 matched_stems.append(stem) 

393 return result 

394 

395 

396def detect_all_packager_provided_files( 

397 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

398 debian_dir: VirtualPath, 

399 binary_packages: Mapping[str, BinaryPackage], 

400 *, 

401 allow_fuzzy_matches: bool = False, 

402 detect_typos: bool = False, 

403 ignore_paths: Container[str] = frozenset(), 

404) -> Dict[str, PerPackagePackagerProvidedResult]: 

405 main_packages = [p.name for p in binary_packages.values() if p.is_main_package] 

406 if not main_packages: 406 ↛ 407line 406 didn't jump to line 407 because the condition on line 406 was never true

407 assert allow_fuzzy_matches 

408 main_binary_package = next( 

409 iter(p.name for p in binary_packages.values() if "Package" in p.fields), 

410 None, 

411 ) 

412 if main_binary_package is None: 

413 return {} 

414 else: 

415 main_binary_package = main_packages[0] 

416 provided_files: Dict[str, Dict[Tuple[str, str], PackagerProvidedFile]] = { 

417 n: {} for n in binary_packages 

418 } 

419 max_periods_in_package_name = max(name.count(".") for name in binary_packages) 

420 if detect_typos and CAN_DETECT_TYPOS: 

421 period2stems = _period_stem(packager_provided_files.keys()) 

422 else: 

423 period2stems = {} 

424 

425 for entry in debian_dir.iterdir: 

426 if entry.is_dir or entry.name.startswith("."): 

427 continue 

428 if entry.path in ignore_paths or entry.name in _KNOWN_NON_PPFS: 

429 continue 

430 matching_ppfs = _split_path( 

431 packager_provided_files, 

432 binary_packages, 

433 main_binary_package, 

434 max_periods_in_package_name, 

435 entry, 

436 allow_fuzzy_matches=allow_fuzzy_matches, 

437 period2stems=period2stems, 

438 ) 

439 for packager_provided_file in matching_ppfs: 

440 provided_files_for_package = provided_files[ 

441 packager_provided_file.package_name 

442 ] 

443 match_key = ( 

444 packager_provided_file.definition.stem, 

445 packager_provided_file.provided_key, 

446 ) 

447 existing = provided_files_for_package.get(match_key) 

448 if ( 

449 existing is not None 

450 and existing.match_priority > packager_provided_file.match_priority 

451 ): 

452 continue 

453 provided_files_for_package[match_key] = packager_provided_file 

454 

455 result = {} 

456 for package_name, provided_file_data in provided_files.items(): 

457 auto_install_list = [ 

458 x for x in provided_file_data.values() if not x.definition.reservation_only 

459 ] 

460 reservation_only = collections.defaultdict(list) 

461 for packager_provided_file in provided_file_data.values(): 

462 if not packager_provided_file.definition.reservation_only: 

463 continue 

464 reservation_only[packager_provided_file.definition.stem].append( 

465 packager_provided_file 

466 ) 

467 

468 result[package_name] = PerPackagePackagerProvidedResult( 

469 auto_install_list, 

470 reservation_only, 

471 ) 

472 

473 return result