Coverage for src/debputy/packager_provided_files.py: 89%

234 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-01-26 19:30 +0000

1import collections.abc 

2import dataclasses 

3from typing import ( 

4 Dict, 

5 List, 

6 Optional, 

7 Tuple, 

8 TYPE_CHECKING, 

9 FrozenSet, 

10) 

11from collections.abc import Mapping, Iterable, Sequence, Container 

12 

13from debputy.packages import BinaryPackage 

14from debputy.plugin.api import VirtualPath 

15from debputy.plugin.api.impl_types import ( 

16 PackagerProvidedFileClassSpec, 

17 PluginProvidedKnownPackagingFile, 

18) 

19from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo 

20 

21if TYPE_CHECKING: 

22 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet 

23 

24 

25_KNOWN_NON_TYPO_EXTENSIONS = frozenset( 

26 { 

27 "conf", 

28 "config", 

29 "sh", 

30 "yml", 

31 "yaml", 

32 "json", 

33 "bash", 

34 "pl", 

35 "py", 

36 "md", 

37 "rst", 

38 # Fairly common image format in older packages 

39 "xpm", 

40 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives. 

41 "bak", 

42 "tmp", 

43 "temp", 

44 "orig", 

45 "rej", 

46 } 

47) 

48 

49 

50@dataclasses.dataclass(frozen=True, slots=True) 

51class PackagerProvidedFile: 

52 path: VirtualPath 

53 package_name: str 

54 installed_as_basename: str 

55 provided_key: str 

56 definition: PackagerProvidedFileClassSpec 

57 match_priority: int = 0 

58 fuzzy_match: bool = False 

59 uses_explicit_package_name: bool = False 

60 name_segment: str | None = None 

61 architecture_restriction: str | None = None 

62 expected_path: str | None = None 

63 

64 def compute_dest(self) -> tuple[str, str]: 

65 return self.definition.compute_dest( 

66 self.installed_as_basename, 

67 owning_package=self.package_name, 

68 path=self.path, 

69 ) 

70 

71 

72@dataclasses.dataclass(slots=True) 

73class PerPackagePackagerProvidedResult: 

74 auto_installable: list[PackagerProvidedFile] 

75 reserved_only: dict[str, list[PackagerProvidedFile]] 

76 

77 

78def _find_package_name_prefix( 

79 binary_packages: Mapping[str, BinaryPackage], 

80 main_binary_package: str, 

81 max_periods_in_package_name: int, 

82 path: VirtualPath, 

83 *, 

84 allow_fuzzy_matches: bool = False, 

85) -> Iterable[tuple[str, str, bool, bool]]: 

86 if max_periods_in_package_name < 1: 

87 prefix, remaining = path.name.split(".", 1) 

88 package_name = prefix 

89 bug_950723 = False 

90 if allow_fuzzy_matches and package_name.endswith("@"): 

91 package_name = package_name[:-1] 

92 bug_950723 = True 

93 if package_name in binary_packages: 

94 yield package_name, remaining, True, bug_950723 

95 else: 

96 yield main_binary_package, path.name, False, False 

97 return 

98 

99 parts = path.name.split(".", max_periods_in_package_name + 1) 

100 for p in range(len(parts) - 1, 0, -1): 

101 name = ".".join(parts[0:p]) 

102 bug_950723 = False 

103 if allow_fuzzy_matches and name.endswith("@"): 103 ↛ 104line 103 didn't jump to line 104 because the condition on line 103 was never true

104 name = name[:-1] 

105 bug_950723 = True 

106 

107 if name in binary_packages: 

108 remaining = ".".join(parts[p:]) 

109 yield name, remaining, True, bug_950723 

110 # main package case 

111 yield main_binary_package, path.name, False, False 

112 

113 

114def _iterate_stem_splits( 

115 basename: str, 

116) -> collections.abc.Iterator[tuple[str, str | None, int]]: 

117 stem = basename 

118 period_count = stem.count(".") 

119 yield stem, None, period_count 

120 install_as_name = "" 

121 while period_count > 0: 

122 period_count -= 1 

123 install_as_name_part, stem = stem.split(".", 1) 

124 install_as_name = ( 

125 install_as_name + "." + install_as_name_part 

126 if install_as_name != "" 

127 else install_as_name_part 

128 ) 

129 yield stem, install_as_name, period_count 

130 

131 

132def _find_definition( 

133 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

134 basename: str, 

135 *, 

136 period2stems: Mapping[int, Sequence[str]] | None = None, 

137 had_arch: bool = False, 

138) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]: 

139 for stem, install_as_name, period_count in _iterate_stem_splits(basename): 

140 definition = packager_provided_files.get(stem) 

141 if definition is not None: 

142 return install_as_name, definition, None 

143 if not period2stems: 

144 continue 

145 stems = period2stems.get(period_count) 

146 

147 if not stems: 

148 continue 

149 # If the stem is also the extension and a known one at that, then 

150 # we do not consider it a typo match (to avoid false positives). 

151 # 

152 # We also ignore "foo.1" since manpages are kind of common. 

153 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()): 

154 continue 

155 max_edit_distance = 2 if len(stem) > 3 else 1 

156 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance) 

157 if matches is not None and len(matches) == 1: 

158 definition = packager_provided_files[matches[0]] 

159 return install_as_name, definition, stem 

160 return None, None, None 

161 

162 

163def _check_mismatches( 

164 path: VirtualPath, 

165 definition: PackagerProvidedFileClassSpec, 

166 owning_package: BinaryPackage, 

167 install_as_name: str | None, 

168 had_arch: bool, 

169) -> None: 

170 if install_as_name is not None and not definition.allow_name_segment: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true

171 _error( 

172 f'The file "{path.fs_path}" looks like a packager provided file for' 

173 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' 

174 " However, this file type does not allow custom naming. The file type was registered" 

175 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

176 " to file a bug/feature request." 

177 ) 

178 if had_arch: 

179 if owning_package.is_arch_all: 179 ↛ 180line 179 didn't jump to line 180 because the condition on line 179 was never true

180 _error( 

181 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

182 f" {owning_package.name} of type {definition.stem}." 

183 " However, the package in question is arch:all. The use of architecture specific files" 

184 " for arch:all packages does not make sense." 

185 ) 

186 if not definition.allow_architecture_segment: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true

187 _error( 

188 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

189 f" {owning_package.name} of type {definition.stem}." 

190 " However, this file type does not allow architecture specific variants. The file type was registered" 

191 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

192 " to file a bug/feature request." 

193 ) 

194 

195 

196def _split_basename( 

197 basename: str, 

198 owning_package: BinaryPackage, 

199 *, 

200 has_explicit_package: bool = False, 

201 allow_fuzzy_matches: bool = False, 

202) -> tuple[str, int, str | None, bool]: 

203 match_priority = 1 if has_explicit_package else 0 

204 fuzzy_match = False 

205 arch_restriction: str | None = None 

206 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 206 ↛ 207line 206 didn't jump to line 207 because the condition on line 206 was never true

207 basename = basename[:-3] 

208 fuzzy_match = True 

209 

210 if "." in basename: 

211 remaining, last_word = basename.rsplit(".", 1) 

212 # We cannot use "resolved_architecture" as it would return "all". 

213 if last_word == owning_package.package_deb_architecture_variable("ARCH"): 

214 match_priority = 3 

215 basename = remaining 

216 arch_restriction = last_word 

217 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 217 ↛ 218line 217 didn't jump to line 218 because the condition on line 217 was never true

218 match_priority = 2 

219 basename = remaining 

220 arch_restriction = last_word 

221 elif last_word == "all" and owning_package.is_arch_all: 221 ↛ 224line 221 didn't jump to line 224 because the condition on line 221 was never true

222 # This case does not make sense, but we detect it, so we can report an error 

223 # via _check_mismatches. 

224 match_priority = -1 

225 basename = remaining 

226 arch_restriction = last_word 

227 

228 return basename, match_priority, arch_restriction, fuzzy_match 

229 

230 

231def _split_path( 

232 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

233 binary_packages: Mapping[str, BinaryPackage], 

234 main_binary_package: str, 

235 max_periods_in_package_name: int, 

236 path: VirtualPath, 

237 *, 

238 allow_fuzzy_matches: bool = False, 

239 period2stems: Mapping[int, Sequence[str]] | None = None, 

240 known_static_non_ppf_names=frozenset(), 

241) -> Iterable[PackagerProvidedFile]: 

242 owning_package_name = main_binary_package 

243 basename = path.name 

244 match_priority = 0 

245 had_arch = False 

246 if "." not in basename: 

247 definition = packager_provided_files.get(basename) 

248 if definition is None: 

249 return 

250 if definition.packageless_is_fallback_for_all_packages: 

251 yield from ( 

252 PackagerProvidedFile( 

253 path=path, 

254 package_name=n, 

255 installed_as_basename=n, 

256 provided_key=".UNNAMED.", 

257 definition=definition, 

258 match_priority=match_priority, 

259 fuzzy_match=False, 

260 uses_explicit_package_name=False, 

261 name_segment=None, 

262 architecture_restriction=None, 

263 ) 

264 for n in binary_packages 

265 ) 

266 else: 

267 yield PackagerProvidedFile( 

268 path=path, 

269 package_name=owning_package_name, 

270 installed_as_basename=owning_package_name, 

271 provided_key=".UNNAMED.", 

272 definition=definition, 

273 match_priority=match_priority, 

274 fuzzy_match=False, 

275 uses_explicit_package_name=False, 

276 name_segment=None, 

277 architecture_restriction=None, 

278 ) 

279 return 

280 if f"debian/{path.name}" in known_static_non_ppf_names: 

281 return 

282 

283 for ( 

284 owning_package_name, 

285 basename, 

286 explicit_package, 

287 bug_950723, 

288 ) in _find_package_name_prefix( 

289 binary_packages, 

290 main_binary_package, 

291 max_periods_in_package_name, 

292 path, 

293 allow_fuzzy_matches=allow_fuzzy_matches, 

294 ): 

295 owning_package = binary_packages[owning_package_name] 

296 

297 basename, match_priority, arch_restriction, fuzzy_match = _split_basename( 

298 basename, 

299 owning_package, 

300 has_explicit_package=explicit_package, 

301 allow_fuzzy_matches=allow_fuzzy_matches, 

302 ) 

303 

304 install_as_name, definition, typoed_stem = _find_definition( 

305 packager_provided_files, 

306 basename, 

307 period2stems=period2stems, 

308 had_arch=bool(arch_restriction), 

309 ) 

310 if definition is None: 

311 continue 

312 

313 # Note: bug_950723 implies allow_fuzzy_matches 

314 if bug_950723 and not definition.bug_950723: 314 ↛ 315line 314 didn't jump to line 315 because the condition on line 314 was never true

315 continue 

316 

317 if not allow_fuzzy_matches: 

318 # LSP/Lint checks here but should not use `_check_mismatches` as 

319 # the hard error disrupts them. 

320 _check_mismatches( 

321 path, 

322 definition, 

323 owning_package, 

324 install_as_name, 

325 arch_restriction is not None, 

326 ) 

327 

328 expected_path: str | None = None 

329 if ( 

330 definition.packageless_is_fallback_for_all_packages 

331 and install_as_name is None 

332 and not had_arch 

333 and not explicit_package 

334 and arch_restriction is None 

335 ): 

336 if typoed_stem is not None: 336 ↛ 337line 336 didn't jump to line 337 because the condition on line 336 was never true

337 parent_path = ( 

338 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

339 ) 

340 expected_path = f"{parent_path}{definition.stem}" 

341 if fuzzy_match and path.name.endswith(".in"): 

342 expected_path += ".in" 

343 yield from ( 

344 PackagerProvidedFile( 

345 path=path, 

346 package_name=n, 

347 installed_as_basename=f"{n}@" if bug_950723 else n, 

348 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", 

349 definition=definition, 

350 match_priority=match_priority, 

351 fuzzy_match=fuzzy_match, 

352 uses_explicit_package_name=False, 

353 name_segment=None, 

354 architecture_restriction=None, 

355 expected_path=expected_path, 

356 ) 

357 for n in binary_packages 

358 ) 

359 else: 

360 provided_key = ( 

361 install_as_name if install_as_name is not None else ".UNNAMED." 

362 ) 

363 basename = ( 

364 install_as_name if install_as_name is not None else owning_package_name 

365 ) 

366 if bug_950723: 

367 provided_key = f"{provided_key}@" 

368 basename = f"{basename}@" 

369 package_prefix = f"{owning_package_name}@" 

370 else: 

371 package_prefix = owning_package_name 

372 if typoed_stem: 

373 parent_path = ( 

374 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

375 ) 

376 basename = definition.stem 

377 if install_as_name is not None: 377 ↛ 378line 377 didn't jump to line 378 because the condition on line 377 was never true

378 basename = f"{install_as_name}.{basename}" 

379 if explicit_package: 

380 basename = f"{package_prefix}.{basename}" 

381 if arch_restriction is not None and arch_restriction != "all": 381 ↛ 382line 381 didn't jump to line 382 because the condition on line 381 was never true

382 basename = f"{basename}.{arch_restriction}" 

383 expected_path = f"{parent_path}{basename}" 

384 if fuzzy_match and path.name.endswith(".in"): 384 ↛ 385line 384 didn't jump to line 385 because the condition on line 384 was never true

385 expected_path += ".in" 

386 yield PackagerProvidedFile( 

387 path=path, 

388 package_name=owning_package_name, 

389 installed_as_basename=basename, 

390 provided_key=provided_key, 

391 definition=definition, 

392 match_priority=match_priority, 

393 fuzzy_match=fuzzy_match, 

394 uses_explicit_package_name=bool(explicit_package), 

395 name_segment=install_as_name, 

396 architecture_restriction=arch_restriction, 

397 expected_path=expected_path, 

398 ) 

399 return 

400 

401 

402def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]: 

403 result: dict[int, list[str]] = {} 

404 for stem in stems: 

405 period_count = stem.count(".") 

406 matched_stems = result.get(period_count) 

407 if not matched_stems: 

408 matched_stems = [stem] 

409 result[period_count] = matched_stems 

410 else: 

411 matched_stems.append(stem) 

412 return result 

413 

414 

415def _find_main_package_name( 

416 binary_packages: Mapping[str, BinaryPackage], 

417 *, 

418 allow_fuzzy_matches: bool = False, 

419) -> str | None: 

420 main_packages = [p.name for p in binary_packages.values() if p.is_main_package] 

421 if not main_packages: 421 ↛ 422line 421 didn't jump to line 422 because the condition on line 421 was never true

422 assert allow_fuzzy_matches 

423 return next( 

424 iter(p.name for p in binary_packages.values() if "Package" in p.fields), 

425 None, 

426 ) 

427 return main_packages[0] 

428 

429 

430@dataclasses.dataclass(slots=True, frozen=True) 

431class PackagingFileClassification: 

432 path: VirtualPath 

433 packager_provided_files_per_package: None | ( 

434 Mapping[str, Sequence[PackagerProvidedFile]] 

435 ) 

436 

437 

438def classify_debian_packaging_files( 

439 plugin_feature_set: "PluginProvidedFeatureSet", 

440 debian_dir: VirtualPath, 

441 binary_packages: Mapping[str, BinaryPackage], 

442 *, 

443 allow_fuzzy_matches: bool = False, 

444 detect_typos: bool = False, 

445 ignore_paths: Container[str] = frozenset(), 

446) -> Iterable[PackagingFileClassification]: 

447 packager_provided_files = plugin_feature_set.packager_provided_files 

448 known_static_non_ppf_names: frozenset[str] = frozenset( 

449 { 

450 p.detection_value 

451 for p in plugin_feature_set.known_packaging_files.values() 

452 if p.detection_method == "path" 

453 } 

454 ) 

455 main_binary_package = _find_main_package_name( 

456 binary_packages, 

457 allow_fuzzy_matches=allow_fuzzy_matches, 

458 ) 

459 if main_binary_package is None: 459 ↛ 460line 459 didn't jump to line 460 because the condition on line 459 was never true

460 return 

461 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {} 

462 max_periods_in_package_name = max(name.count(".") for name in binary_packages) 

463 if detect_typos and CAN_DETECT_TYPOS: 

464 period2stems = _period_stem(packager_provided_files.keys()) 

465 else: 

466 period2stems = {} 

467 

468 paths = [] 

469 

470 for entry in debian_dir.iterdir: 

471 if entry.is_dir or entry.name.startswith("."): 

472 continue 

473 if entry.path in ignore_paths: 

474 continue 

475 paths.append(entry) 

476 matching_ppfs = _split_path( 

477 packager_provided_files, 

478 binary_packages, 

479 main_binary_package, 

480 max_periods_in_package_name, 

481 entry, 

482 allow_fuzzy_matches=allow_fuzzy_matches, 

483 period2stems=period2stems, 

484 known_static_non_ppf_names=known_static_non_ppf_names, 

485 ) 

486 for packager_provided_file in matching_ppfs: 

487 match_key = ( 

488 packager_provided_file.package_name, 

489 packager_provided_file.definition.stem, 

490 packager_provided_file.provided_key, 

491 ) 

492 existing = provided_files_by_key.get(match_key) 

493 if ( 

494 existing is not None 

495 and existing.match_priority > packager_provided_file.match_priority 

496 ): 

497 continue 

498 provided_files_by_key[match_key] = packager_provided_file 

499 

500 paths2ppfs_per_package = dict[str, dict[str, list[PackagerProvidedFile]]]() 

501 for packager_provided_file in provided_files_by_key.values(): 

502 package_name = packager_provided_file.package_name 

503 path_name = packager_provided_file.path.path 

504 ppfs_per_package = paths2ppfs_per_package.get(path_name) 

505 if ppfs_per_package is None: 

506 ppfs_per_package = collections.defaultdict(list) 

507 paths2ppfs_per_package[path_name] = ppfs_per_package 

508 ppfs_per_package[package_name].append(packager_provided_file) 

509 

510 for entry in paths: 

511 yield PackagingFileClassification( 

512 entry, 

513 paths2ppfs_per_package.get(entry.path), 

514 ) 

515 

516 

517def detect_all_packager_provided_files( 

518 plugin_feature_set: "PluginProvidedFeatureSet", 

519 debian_dir: VirtualPath, 

520 binary_packages: Mapping[str, BinaryPackage], 

521 *, 

522 allow_fuzzy_matches: bool = False, 

523 detect_typos: bool = False, 

524 ignore_paths: Container[str] = frozenset(), 

525) -> dict[str, PerPackagePackagerProvidedResult]: 

526 result = { 

527 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list)) 

528 for n in binary_packages 

529 } 

530 for classified_path in classify_debian_packaging_files( 

531 plugin_feature_set, 

532 debian_dir, 

533 binary_packages, 

534 allow_fuzzy_matches=allow_fuzzy_matches, 

535 detect_typos=detect_typos, 

536 ignore_paths=ignore_paths, 

537 ): 

538 provided_files = classified_path.packager_provided_files_per_package 

539 if not provided_files: 

540 continue 

541 for package_name, provided_file_data in provided_files.items(): 

542 per_package_result = result[package_name] 

543 per_package_result.auto_installable.extend( 

544 x for x in provided_file_data if not x.definition.reservation_only 

545 ) 

546 reservation_only = per_package_result.reserved_only 

547 for packager_provided_file in provided_file_data: 

548 if not packager_provided_file.definition.reservation_only: 

549 continue 

550 reservation_only[packager_provided_file.definition.stem].append( 

551 packager_provided_file 

552 ) 

553 

554 return result