Coverage for src/debputy/packager_provided_files.py: 88%

237 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-04-19 20:37 +0000

1import collections.abc 

2import dataclasses 

3from typing import ( 

4 TYPE_CHECKING, 

5) 

6from collections.abc import Mapping, Iterable, Sequence, Container 

7 

8from debputy.packages import BinaryPackage 

9from debputy.plugin.api import VirtualPath 

10from debputy.plugin.api.impl_types import ( 

11 PackagerProvidedFileClassSpec, 

12) 

13from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo 

14 

15if TYPE_CHECKING: 

16 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet 

17 

18 

19_KNOWN_NON_TYPO_EXTENSIONS = frozenset( 

20 { 

21 "conf", 

22 "config", 

23 "sh", 

24 "yml", 

25 "yaml", 

26 "json", 

27 "bash", 

28 "pl", 

29 "py", 

30 "md", 

31 "rst", 

32 # Fairly common image format in older packages 

33 "xpm", 

34 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives. 

35 "bak", 

36 "tmp", 

37 "temp", 

38 "orig", 

39 "rej", 

40 } 

41) 

42 

43 

44@dataclasses.dataclass(frozen=True, slots=True) 

45class PackagerProvidedFile: 

46 path: VirtualPath 

47 package_name: str 

48 installed_as_basename: str 

49 provided_key: str 

50 definition: PackagerProvidedFileClassSpec 

51 match_priority: int = 0 

52 fuzzy_match: bool = False 

53 uses_explicit_package_name: bool = False 

54 name_segment: str | None = None 

55 architecture_restriction: str | None = None 

56 expected_path: str | None = None 

57 

58 def compute_dest(self) -> tuple[str, str]: 

59 return self.definition.compute_dest( 

60 self.installed_as_basename, 

61 owning_package=self.package_name, 

62 path=self.path, 

63 ) 

64 

65 

66@dataclasses.dataclass(slots=True) 

67class PerPackagePackagerProvidedResult: 

68 auto_installable: list[PackagerProvidedFile] 

69 reserved_only: dict[str, list[PackagerProvidedFile]] 

70 

71 

72def _find_package_name_prefix( 

73 binary_packages: Mapping[str, BinaryPackage], 

74 main_binary_package: str, 

75 max_periods_in_package_name: int, 

76 path: VirtualPath, 

77 *, 

78 allow_fuzzy_matches: bool = False, 

79) -> Iterable[tuple[str, str, bool, bool]]: 

80 if max_periods_in_package_name < 1: 

81 prefix, remaining = path.name.split(".", 1) 

82 package_name = prefix 

83 bug_950723 = False 

84 if allow_fuzzy_matches and package_name.endswith("@"): 

85 package_name = package_name[:-1] 

86 bug_950723 = True 

87 if package_name in binary_packages: 

88 yield package_name, remaining, True, bug_950723 

89 else: 

90 yield main_binary_package, path.name, False, False 

91 return 

92 

93 parts = path.name.split(".", max_periods_in_package_name + 1) 

94 for p in range(len(parts) - 1, 0, -1): 

95 name = ".".join(parts[0:p]) 

96 bug_950723 = False 

97 if allow_fuzzy_matches and name.endswith("@"): 97 ↛ 98line 97 didn't jump to line 98 because the condition on line 97 was never true

98 name = name[:-1] 

99 bug_950723 = True 

100 

101 if name in binary_packages: 

102 remaining = ".".join(parts[p:]) 

103 yield name, remaining, True, bug_950723 

104 # main package case 

105 yield main_binary_package, path.name, False, False 

106 

107 

108def _iterate_stem_splits( 

109 basename: str, 

110) -> collections.abc.Iterator[tuple[str, str | None, int]]: 

111 stem = basename 

112 period_count = stem.count(".") 

113 yield stem, None, period_count 

114 install_as_name = "" 

115 while period_count > 0: 

116 period_count -= 1 

117 install_as_name_part, stem = stem.split(".", 1) 

118 install_as_name = ( 

119 install_as_name + "." + install_as_name_part 

120 if install_as_name != "" 

121 else install_as_name_part 

122 ) 

123 yield stem, install_as_name, period_count 

124 

125 

126def _find_definition( 

127 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

128 basename: str, 

129 *, 

130 period2stems: Mapping[int, Sequence[str]] | None = None, 

131 had_arch: bool = False, 

132) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]: 

133 for stem, install_as_name, period_count in _iterate_stem_splits(basename): 

134 definition = packager_provided_files.get(stem) 

135 if definition is not None: 

136 return install_as_name, definition, None 

137 if not period2stems: 

138 continue 

139 stems = period2stems.get(period_count) 

140 

141 if not stems: 

142 continue 

143 # If the stem is also the extension and a known one at that, then 

144 # we do not consider it a typo match (to avoid false positives). 

145 # 

146 # We also ignore "foo.1" since manpages are kind of common. 

147 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()): 

148 continue 

149 max_edit_distance = 2 if len(stem) > 3 else 1 

150 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance) 

151 if matches is not None and len(matches) == 1: 

152 definition = packager_provided_files[matches[0]] 

153 return install_as_name, definition, stem 

154 return None, None, None 

155 

156 

157def _check_mismatches( 

158 path: VirtualPath, 

159 definition: PackagerProvidedFileClassSpec, 

160 owning_package: BinaryPackage, 

161 install_as_name: str | None, 

162 had_arch: bool, 

163) -> None: 

164 if install_as_name is not None and not definition.allow_name_segment: 164 ↛ 165line 164 didn't jump to line 165 because the condition on line 164 was never true

165 _error( 

166 f'The file "{path.fs_path}" looks like a packager provided file for' 

167 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' 

168 " However, this file type does not allow custom naming. The file type was registered" 

169 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

170 " to file a bug/feature request." 

171 ) 

172 if had_arch: 

173 if owning_package.is_arch_all: 173 ↛ 174line 173 didn't jump to line 174 because the condition on line 173 was never true

174 _error( 

175 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

176 f" {owning_package.name} of type {definition.stem}." 

177 " However, the package in question is arch:all. The use of architecture specific files" 

178 " for arch:all packages does not make sense." 

179 ) 

180 if not definition.allow_architecture_segment: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true

181 _error( 

182 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

183 f" {owning_package.name} of type {definition.stem}." 

184 " However, this file type does not allow architecture specific variants. The file type was registered" 

185 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

186 " to file a bug/feature request." 

187 ) 

188 

189 

190def _split_basename( 

191 basename: str, 

192 owning_package: BinaryPackage, 

193 *, 

194 has_explicit_package: bool = False, 

195 allow_fuzzy_matches: bool = False, 

196) -> tuple[str, int, str | None, bool]: 

197 match_priority = 1 if has_explicit_package else 0 

198 fuzzy_match = False 

199 arch_restriction: str | None = None 

200 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true

201 basename = basename[:-3] 

202 fuzzy_match = True 

203 

204 if "." in basename: 

205 remaining, last_word = basename.rsplit(".", 1) 

206 # We cannot use "resolved_architecture" as it would return "all". 

207 if last_word == owning_package.package_deb_architecture_variable("ARCH"): 

208 match_priority = 3 

209 basename = remaining 

210 arch_restriction = last_word 

211 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true

212 match_priority = 2 

213 basename = remaining 

214 arch_restriction = last_word 

215 elif last_word == "all" and owning_package.is_arch_all: 215 ↛ 218line 215 didn't jump to line 218 because the condition on line 215 was never true

216 # This case does not make sense, but we detect it, so we can report an error 

217 # via _check_mismatches. 

218 match_priority = -1 

219 basename = remaining 

220 arch_restriction = last_word 

221 

222 return basename, match_priority, arch_restriction, fuzzy_match 

223 

224 

225def _split_path( 

226 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

227 binary_packages: Mapping[str, BinaryPackage], 

228 main_binary_package: str, 

229 max_periods_in_package_name: int, 

230 path: VirtualPath, 

231 *, 

232 allow_fuzzy_matches: bool = False, 

233 period2stems: Mapping[int, Sequence[str]] | None = None, 

234 known_static_non_ppf_names=frozenset(), 

235) -> Iterable[PackagerProvidedFile]: 

236 owning_package_name = main_binary_package 

237 basename = path.name 

238 match_priority = 0 

239 had_arch = False 

240 if "." not in basename: 

241 definition = packager_provided_files.get(basename) 

242 if definition is None: 

243 return 

244 if definition.packageless_is_fallback_for_all_packages: 

245 yield from ( 

246 PackagerProvidedFile( 

247 path=path, 

248 package_name=n, 

249 installed_as_basename=n, 

250 provided_key=".UNNAMED.", 

251 definition=definition, 

252 match_priority=match_priority, 

253 fuzzy_match=False, 

254 uses_explicit_package_name=False, 

255 name_segment=None, 

256 architecture_restriction=None, 

257 ) 

258 for n, p in binary_packages.items() 

259 if p.package_type in definition.package_types 

260 ) 

261 elif ( 261 ↛ 278line 261 didn't jump to line 278 because the condition on line 261 was always true

262 binary_packages[owning_package_name].package_type 

263 in definition.package_types 

264 ): 

265 # TODO: raise some noise (a la `not allow fuzzy_matches`) if not matching the package type. 

266 yield PackagerProvidedFile( 

267 path=path, 

268 package_name=owning_package_name, 

269 installed_as_basename=owning_package_name, 

270 provided_key=".UNNAMED.", 

271 definition=definition, 

272 match_priority=match_priority, 

273 fuzzy_match=False, 

274 uses_explicit_package_name=False, 

275 name_segment=None, 

276 architecture_restriction=None, 

277 ) 

278 return 

279 if f"debian/{path.name}" in known_static_non_ppf_names: 

280 return 

281 

282 for ( 

283 owning_package_name, 

284 basename, 

285 explicit_package, 

286 bug_950723, 

287 ) in _find_package_name_prefix( 

288 binary_packages, 

289 main_binary_package, 

290 max_periods_in_package_name, 

291 path, 

292 allow_fuzzy_matches=allow_fuzzy_matches, 

293 ): 

294 owning_package = binary_packages[owning_package_name] 

295 

296 basename, match_priority, arch_restriction, fuzzy_match = _split_basename( 

297 basename, 

298 owning_package, 

299 has_explicit_package=explicit_package, 

300 allow_fuzzy_matches=allow_fuzzy_matches, 

301 ) 

302 

303 install_as_name, definition, typoed_stem = _find_definition( 

304 packager_provided_files, 

305 basename, 

306 period2stems=period2stems, 

307 had_arch=bool(arch_restriction), 

308 ) 

309 if definition is None: 

310 continue 

311 

312 # Note: bug_950723 implies allow_fuzzy_matches 

313 if bug_950723 and not definition.bug_950723: 313 ↛ 314line 313 didn't jump to line 314 because the condition on line 313 was never true

314 continue 

315 

316 if owning_package.package_type not in definition.package_types: 316 ↛ 318line 316 didn't jump to line 318 because the condition on line 316 was never true

317 # TODO: raise some noise (a la `not allow fuzzy_matches`) 

318 continue 

319 

320 if not allow_fuzzy_matches: 

321 # LSP/Lint checks here but should not use `_check_mismatches` as 

322 # the hard error disrupts them. 

323 _check_mismatches( 

324 path, 

325 definition, 

326 owning_package, 

327 install_as_name, 

328 arch_restriction is not None, 

329 ) 

330 

331 expected_path: str | None = None 

332 if ( 

333 definition.packageless_is_fallback_for_all_packages 

334 and install_as_name is None 

335 and not had_arch 

336 and not explicit_package 

337 and arch_restriction is None 

338 ): 

339 if typoed_stem is not None: 339 ↛ 340line 339 didn't jump to line 340 because the condition on line 339 was never true

340 parent_path = ( 

341 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

342 ) 

343 expected_path = f"{parent_path}{definition.stem}" 

344 if fuzzy_match and path.name.endswith(".in"): 

345 expected_path += ".in" 

346 yield from ( 

347 PackagerProvidedFile( 

348 path=path, 

349 package_name=n, 

350 installed_as_basename=f"{n}@" if bug_950723 else n, 

351 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", 

352 definition=definition, 

353 match_priority=match_priority, 

354 fuzzy_match=fuzzy_match, 

355 uses_explicit_package_name=False, 

356 name_segment=None, 

357 architecture_restriction=None, 

358 expected_path=expected_path, 

359 ) 

360 for n in binary_packages 

361 ) 

362 else: 

363 provided_key = ( 

364 install_as_name if install_as_name is not None else ".UNNAMED." 

365 ) 

366 basename = ( 

367 install_as_name if install_as_name is not None else owning_package_name 

368 ) 

369 if bug_950723: 

370 provided_key = f"{provided_key}@" 

371 basename = f"{basename}@" 

372 package_prefix = f"{owning_package_name}@" 

373 else: 

374 package_prefix = owning_package_name 

375 if typoed_stem: 

376 parent_path = ( 

377 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

378 ) 

379 basename = definition.stem 

380 if install_as_name is not None: 380 ↛ 381line 380 didn't jump to line 381 because the condition on line 380 was never true

381 basename = f"{install_as_name}.{basename}" 

382 if explicit_package: 

383 basename = f"{package_prefix}.{basename}" 

384 if arch_restriction is not None and arch_restriction != "all": 384 ↛ 385line 384 didn't jump to line 385 because the condition on line 384 was never true

385 basename = f"{basename}.{arch_restriction}" 

386 expected_path = f"{parent_path}{basename}" 

387 if fuzzy_match and path.name.endswith(".in"): 387 ↛ 388line 387 didn't jump to line 388 because the condition on line 387 was never true

388 expected_path += ".in" 

389 yield PackagerProvidedFile( 

390 path=path, 

391 package_name=owning_package_name, 

392 installed_as_basename=basename, 

393 provided_key=provided_key, 

394 definition=definition, 

395 match_priority=match_priority, 

396 fuzzy_match=fuzzy_match, 

397 uses_explicit_package_name=bool(explicit_package), 

398 name_segment=install_as_name, 

399 architecture_restriction=arch_restriction, 

400 expected_path=expected_path, 

401 ) 

402 return 

403 

404 

405def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]: 

406 result: dict[int, list[str]] = {} 

407 for stem in stems: 

408 period_count = stem.count(".") 

409 matched_stems = result.get(period_count) 

410 if not matched_stems: 

411 matched_stems = [stem] 

412 result[period_count] = matched_stems 

413 else: 

414 matched_stems.append(stem) 

415 return result 

416 

417 

418def _find_main_package_name( 

419 binary_packages: Mapping[str, BinaryPackage], 

420 *, 

421 allow_fuzzy_matches: bool = False, 

422) -> str | None: 

423 main_packages = [p.name for p in binary_packages.values() if p.is_main_package] 

424 if not main_packages: 424 ↛ 425line 424 didn't jump to line 425 because the condition on line 424 was never true

425 assert allow_fuzzy_matches 

426 return next( 

427 iter(p.name for p in binary_packages.values() if "Package" in p.fields), 

428 None, 

429 ) 

430 return main_packages[0] 

431 

432 

433@dataclasses.dataclass(slots=True, frozen=True) 

434class PackagingFileClassification: 

435 path: VirtualPath 

436 packager_provided_files_per_package: None | ( 

437 Mapping[str, Sequence[PackagerProvidedFile]] 

438 ) 

439 

440 

441def classify_debian_packaging_files( 

442 plugin_feature_set: "PluginProvidedFeatureSet", 

443 debian_dir: VirtualPath, 

444 binary_packages: Mapping[str, BinaryPackage], 

445 *, 

446 allow_fuzzy_matches: bool = False, 

447 detect_typos: bool = False, 

448 ignore_paths: Container[str] = frozenset(), 

449) -> Iterable[PackagingFileClassification]: 

450 packager_provided_files = plugin_feature_set.packager_provided_files 

451 known_static_non_ppf_names: frozenset[str] = frozenset( 

452 { 

453 p.detection_value 

454 for p in plugin_feature_set.known_packaging_files.values() 

455 if p.detection_method == "path" 

456 } 

457 ) 

458 main_binary_package = _find_main_package_name( 

459 binary_packages, 

460 allow_fuzzy_matches=allow_fuzzy_matches, 

461 ) 

462 if main_binary_package is None: 462 ↛ 463line 462 didn't jump to line 463 because the condition on line 462 was never true

463 return 

464 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {} 

465 max_periods_in_package_name = max(name.count(".") for name in binary_packages) 

466 if detect_typos and CAN_DETECT_TYPOS: 

467 period2stems = _period_stem(packager_provided_files.keys()) 

468 else: 

469 period2stems = {} 

470 

471 paths = [] 

472 

473 for entry in debian_dir.iterdir(): 

474 if entry.is_dir or entry.name.startswith("."): 

475 continue 

476 if entry.path in ignore_paths: 

477 continue 

478 paths.append(entry) 

479 matching_ppfs = _split_path( 

480 packager_provided_files, 

481 binary_packages, 

482 main_binary_package, 

483 max_periods_in_package_name, 

484 entry, 

485 allow_fuzzy_matches=allow_fuzzy_matches, 

486 period2stems=period2stems, 

487 known_static_non_ppf_names=known_static_non_ppf_names, 

488 ) 

489 for packager_provided_file in matching_ppfs: 

490 match_key = ( 

491 packager_provided_file.package_name, 

492 packager_provided_file.definition.stem, 

493 packager_provided_file.provided_key, 

494 ) 

495 existing = provided_files_by_key.get(match_key) 

496 if ( 

497 existing is not None 

498 and existing.match_priority > packager_provided_file.match_priority 

499 ): 

500 continue 

501 provided_files_by_key[match_key] = packager_provided_file 

502 

503 paths2ppfs_per_package = dict[str, dict[str, list[PackagerProvidedFile]]]() 

504 for packager_provided_file in provided_files_by_key.values(): 

505 package_name = packager_provided_file.package_name 

506 path_name = packager_provided_file.path.path 

507 ppfs_per_package = paths2ppfs_per_package.get(path_name) 

508 if ppfs_per_package is None: 

509 ppfs_per_package = collections.defaultdict(list) 

510 paths2ppfs_per_package[path_name] = ppfs_per_package 

511 ppfs_per_package[package_name].append(packager_provided_file) 

512 

513 for entry in paths: 

514 yield PackagingFileClassification( 

515 entry, 

516 paths2ppfs_per_package.get(entry.path), 

517 ) 

518 

519 

520def detect_all_packager_provided_files( 

521 plugin_feature_set: "PluginProvidedFeatureSet", 

522 debian_dir: VirtualPath, 

523 binary_packages: Mapping[str, BinaryPackage], 

524 *, 

525 allow_fuzzy_matches: bool = False, 

526 detect_typos: bool = False, 

527 ignore_paths: Container[str] = frozenset(), 

528) -> dict[str, PerPackagePackagerProvidedResult]: 

529 result = { 

530 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list)) 

531 for n in binary_packages 

532 } 

533 for classified_path in classify_debian_packaging_files( 

534 plugin_feature_set, 

535 debian_dir, 

536 binary_packages, 

537 allow_fuzzy_matches=allow_fuzzy_matches, 

538 detect_typos=detect_typos, 

539 ignore_paths=ignore_paths, 

540 ): 

541 provided_files = classified_path.packager_provided_files_per_package 

542 if not provided_files: 

543 continue 

544 for package_name, provided_file_data in provided_files.items(): 

545 per_package_result = result[package_name] 

546 per_package_result.auto_installable.extend( 

547 x for x in provided_file_data if not x.definition.reservation_only 

548 ) 

549 reservation_only = per_package_result.reserved_only 

550 for packager_provided_file in provided_file_data: 

551 if not packager_provided_file.definition.reservation_only: 

552 continue 

553 reservation_only[packager_provided_file.definition.stem].append( 

554 packager_provided_file 

555 ) 

556 

557 return result