Coverage for src/debputy/packager_provided_files.py: 89%

234 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-10-12 15:06 +0000

1import collections 

2import dataclasses 

3from typing import ( 

4 Dict, 

5 List, 

6 Optional, 

7 Tuple, 

8 TYPE_CHECKING, 

9 FrozenSet, 

10) 

11from collections.abc import Mapping, Iterable, Sequence, Container 

12 

13from debputy.packages import BinaryPackage 

14from debputy.plugin.api import VirtualPath 

15from debputy.plugin.api.impl_types import ( 

16 PackagerProvidedFileClassSpec, 

17 PluginProvidedKnownPackagingFile, 

18) 

19from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo 

20 

21if TYPE_CHECKING: 

22 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet 

23 

24 

25_KNOWN_NON_TYPO_EXTENSIONS = frozenset( 

26 { 

27 "conf", 

28 "config", 

29 "sh", 

30 "yml", 

31 "yaml", 

32 "json", 

33 "bash", 

34 "pl", 

35 "py", 

36 "md", 

37 "rst", 

38 # Fairly common image format in older packages 

39 "xpm", 

40 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives. 

41 "bak", 

42 "tmp", 

43 "temp", 

44 "orig", 

45 "rej", 

46 } 

47) 

48 

49 

50@dataclasses.dataclass(frozen=True, slots=True) 

51class PackagerProvidedFile: 

52 path: VirtualPath 

53 package_name: str 

54 installed_as_basename: str 

55 provided_key: str 

56 definition: PackagerProvidedFileClassSpec 

57 match_priority: int = 0 

58 fuzzy_match: bool = False 

59 uses_explicit_package_name: bool = False 

60 name_segment: str | None = None 

61 architecture_restriction: str | None = None 

62 expected_path: str | None = None 

63 

64 def compute_dest(self) -> tuple[str, str]: 

65 return self.definition.compute_dest( 

66 self.installed_as_basename, 

67 owning_package=self.package_name, 

68 path=self.path, 

69 ) 

70 

71 

72@dataclasses.dataclass(slots=True) 

73class PerPackagePackagerProvidedResult: 

74 auto_installable: list[PackagerProvidedFile] 

75 reserved_only: dict[str, list[PackagerProvidedFile]] 

76 

77 

78def _find_package_name_prefix( 

79 binary_packages: Mapping[str, BinaryPackage], 

80 main_binary_package: str, 

81 max_periods_in_package_name: int, 

82 path: VirtualPath, 

83 *, 

84 allow_fuzzy_matches: bool = False, 

85) -> Iterable[tuple[str, str, bool, bool]]: 

86 if max_periods_in_package_name < 1: 

87 prefix, remaining = path.name.split(".", 1) 

88 package_name = prefix 

89 bug_950723 = False 

90 if allow_fuzzy_matches and package_name.endswith("@"): 

91 package_name = package_name[:-1] 

92 bug_950723 = True 

93 if package_name in binary_packages: 

94 yield package_name, remaining, True, bug_950723 

95 else: 

96 yield main_binary_package, path.name, False, False 

97 return 

98 

99 parts = path.name.split(".", max_periods_in_package_name + 1) 

100 for p in range(len(parts) - 1, 0, -1): 

101 name = ".".join(parts[0:p]) 

102 bug_950723 = False 

103 if allow_fuzzy_matches and name.endswith("@"): 103 ↛ 104line 103 didn't jump to line 104 because the condition on line 103 was never true

104 name = name[:-1] 

105 bug_950723 = True 

106 

107 if name in binary_packages: 

108 remaining = ".".join(parts[p:]) 

109 yield name, remaining, True, bug_950723 

110 # main package case 

111 yield main_binary_package, path.name, False, False 

112 

113 

114def _iterate_stem_splits(basename: str) -> tuple[str, str, int]: 

115 stem = basename 

116 period_count = stem.count(".") 

117 yield stem, None, period_count 

118 install_as_name = "" 

119 while period_count > 0: 

120 period_count -= 1 

121 install_as_name_part, stem = stem.split(".", 1) 

122 install_as_name = ( 

123 install_as_name + "." + install_as_name_part 

124 if install_as_name != "" 

125 else install_as_name_part 

126 ) 

127 yield stem, install_as_name, period_count 

128 

129 

130def _find_definition( 

131 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

132 basename: str, 

133 *, 

134 period2stems: Mapping[int, Sequence[str]] | None = None, 

135 had_arch: bool = False, 

136) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]: 

137 for stem, install_as_name, period_count in _iterate_stem_splits(basename): 

138 definition = packager_provided_files.get(stem) 

139 if definition is not None: 

140 return install_as_name, definition, None 

141 if not period2stems: 

142 continue 

143 stems = period2stems.get(period_count) 

144 

145 if not stems: 

146 continue 

147 # If the stem is also the extension and a known one at that, then 

148 # we do not consider it a typo match (to avoid false positives). 

149 # 

150 # We also ignore "foo.1" since manpages are kind of common. 

151 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()): 

152 continue 

153 max_edit_distance = 2 if len(stem) > 3 else 1 

154 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance) 

155 if matches is not None and len(matches) == 1: 

156 definition = packager_provided_files[matches[0]] 

157 return install_as_name, definition, stem 

158 return None, None, None 

159 

160 

161def _check_mismatches( 

162 path: VirtualPath, 

163 definition: PackagerProvidedFileClassSpec, 

164 owning_package: BinaryPackage, 

165 install_as_name: str | None, 

166 had_arch: bool, 

167) -> None: 

168 if install_as_name is not None and not definition.allow_name_segment: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true

169 _error( 

170 f'The file "{path.fs_path}" looks like a packager provided file for' 

171 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' 

172 " However, this file type does not allow custom naming. The file type was registered" 

173 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

174 " to file a bug/feature request." 

175 ) 

176 if had_arch: 

177 if owning_package.is_arch_all: 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true

178 _error( 

179 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

180 f" {owning_package.name} of type {definition.stem}." 

181 " However, the package in question is arch:all. The use of architecture specific files" 

182 " for arch:all packages does not make sense." 

183 ) 

184 if not definition.allow_architecture_segment: 184 ↛ 185line 184 didn't jump to line 185 because the condition on line 184 was never true

185 _error( 

186 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

187 f" {owning_package.name} of type {definition.stem}." 

188 " However, this file type does not allow architecture specific variants. The file type was registered" 

189 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

190 " to file a bug/feature request." 

191 ) 

192 

193 

194def _split_basename( 

195 basename: str, 

196 owning_package: BinaryPackage, 

197 *, 

198 has_explicit_package: bool = False, 

199 allow_fuzzy_matches: bool = False, 

200) -> tuple[str, int, str | None, bool]: 

201 match_priority = 1 if has_explicit_package else 0 

202 fuzzy_match = False 

203 arch_restriction: str | None = None 

204 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 204 ↛ 205line 204 didn't jump to line 205 because the condition on line 204 was never true

205 basename = basename[:-3] 

206 fuzzy_match = True 

207 

208 if "." in basename: 

209 remaining, last_word = basename.rsplit(".", 1) 

210 # We cannot use "resolved_architecture" as it would return "all". 

211 if last_word == owning_package.package_deb_architecture_variable("ARCH"): 

212 match_priority = 3 

213 basename = remaining 

214 arch_restriction = last_word 

215 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 215 ↛ 216line 215 didn't jump to line 216 because the condition on line 215 was never true

216 match_priority = 2 

217 basename = remaining 

218 arch_restriction = last_word 

219 elif last_word == "all" and owning_package.is_arch_all: 219 ↛ 222line 219 didn't jump to line 222 because the condition on line 219 was never true

220 # This case does not make sense, but we detect it, so we can report an error 

221 # via _check_mismatches. 

222 match_priority = -1 

223 basename = remaining 

224 arch_restriction = last_word 

225 

226 return basename, match_priority, arch_restriction, fuzzy_match 

227 

228 

229def _split_path( 

230 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

231 binary_packages: Mapping[str, BinaryPackage], 

232 main_binary_package: str, 

233 max_periods_in_package_name: int, 

234 path: VirtualPath, 

235 *, 

236 allow_fuzzy_matches: bool = False, 

237 period2stems: Mapping[int, Sequence[str]] | None = None, 

238 known_static_non_ppf_names=frozenset(), 

239) -> Iterable[PackagerProvidedFile]: 

240 owning_package_name = main_binary_package 

241 basename = path.name 

242 match_priority = 0 

243 had_arch = False 

244 if "." not in basename: 

245 definition = packager_provided_files.get(basename) 

246 if definition is None: 

247 return 

248 if definition.packageless_is_fallback_for_all_packages: 

249 yield from ( 

250 PackagerProvidedFile( 

251 path=path, 

252 package_name=n, 

253 installed_as_basename=n, 

254 provided_key=".UNNAMED.", 

255 definition=definition, 

256 match_priority=match_priority, 

257 fuzzy_match=False, 

258 uses_explicit_package_name=False, 

259 name_segment=None, 

260 architecture_restriction=None, 

261 ) 

262 for n in binary_packages 

263 ) 

264 else: 

265 yield PackagerProvidedFile( 

266 path=path, 

267 package_name=owning_package_name, 

268 installed_as_basename=owning_package_name, 

269 provided_key=".UNNAMED.", 

270 definition=definition, 

271 match_priority=match_priority, 

272 fuzzy_match=False, 

273 uses_explicit_package_name=False, 

274 name_segment=None, 

275 architecture_restriction=None, 

276 ) 

277 return 

278 if f"debian/{path.name}" in known_static_non_ppf_names: 

279 return 

280 

281 for ( 

282 owning_package_name, 

283 basename, 

284 explicit_package, 

285 bug_950723, 

286 ) in _find_package_name_prefix( 

287 binary_packages, 

288 main_binary_package, 

289 max_periods_in_package_name, 

290 path, 

291 allow_fuzzy_matches=allow_fuzzy_matches, 

292 ): 

293 owning_package = binary_packages[owning_package_name] 

294 

295 basename, match_priority, arch_restriction, fuzzy_match = _split_basename( 

296 basename, 

297 owning_package, 

298 has_explicit_package=explicit_package, 

299 allow_fuzzy_matches=allow_fuzzy_matches, 

300 ) 

301 

302 install_as_name, definition, typoed_stem = _find_definition( 

303 packager_provided_files, 

304 basename, 

305 period2stems=period2stems, 

306 had_arch=bool(arch_restriction), 

307 ) 

308 if definition is None: 

309 continue 

310 

311 # Note: bug_950723 implies allow_fuzzy_matches 

312 if bug_950723 and not definition.bug_950723: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true

313 continue 

314 

315 if not allow_fuzzy_matches: 

316 # LSP/Lint checks here but should not use `_check_mismatches` as 

317 # the hard error disrupts them. 

318 _check_mismatches( 

319 path, 

320 definition, 

321 owning_package, 

322 install_as_name, 

323 arch_restriction is not None, 

324 ) 

325 

326 expected_path: str | None = None 

327 if ( 

328 definition.packageless_is_fallback_for_all_packages 

329 and install_as_name is None 

330 and not had_arch 

331 and not explicit_package 

332 and arch_restriction is None 

333 ): 

334 if typoed_stem is not None: 334 ↛ 335line 334 didn't jump to line 335 because the condition on line 334 was never true

335 parent_path = ( 

336 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

337 ) 

338 expected_path = f"{parent_path}{definition.stem}" 

339 if fuzzy_match and path.name.endswith(".in"): 

340 expected_path += ".in" 

341 yield from ( 

342 PackagerProvidedFile( 

343 path=path, 

344 package_name=n, 

345 installed_as_basename=f"{n}@" if bug_950723 else n, 

346 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", 

347 definition=definition, 

348 match_priority=match_priority, 

349 fuzzy_match=fuzzy_match, 

350 uses_explicit_package_name=False, 

351 name_segment=None, 

352 architecture_restriction=None, 

353 expected_path=expected_path, 

354 ) 

355 for n in binary_packages 

356 ) 

357 else: 

358 provided_key = ( 

359 install_as_name if install_as_name is not None else ".UNNAMED." 

360 ) 

361 basename = ( 

362 install_as_name if install_as_name is not None else owning_package_name 

363 ) 

364 if bug_950723: 

365 provided_key = f"{provided_key}@" 

366 basename = f"{basename}@" 

367 package_prefix = f"{owning_package_name}@" 

368 else: 

369 package_prefix = owning_package_name 

370 if typoed_stem: 

371 parent_path = ( 

372 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

373 ) 

374 basename = definition.stem 

375 if install_as_name is not None: 375 ↛ 376line 375 didn't jump to line 376 because the condition on line 375 was never true

376 basename = f"{install_as_name}.{basename}" 

377 if explicit_package: 

378 basename = f"{package_prefix}.{basename}" 

379 if arch_restriction is not None and arch_restriction != "all": 379 ↛ 380line 379 didn't jump to line 380 because the condition on line 379 was never true

380 basename = f"{basename}.{arch_restriction}" 

381 expected_path = f"{parent_path}{basename}" 

382 if fuzzy_match and path.name.endswith(".in"): 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true

383 expected_path += ".in" 

384 yield PackagerProvidedFile( 

385 path=path, 

386 package_name=owning_package_name, 

387 installed_as_basename=basename, 

388 provided_key=provided_key, 

389 definition=definition, 

390 match_priority=match_priority, 

391 fuzzy_match=fuzzy_match, 

392 uses_explicit_package_name=bool(explicit_package), 

393 name_segment=install_as_name, 

394 architecture_restriction=arch_restriction, 

395 expected_path=expected_path, 

396 ) 

397 return 

398 

399 

400def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]: 

401 result: dict[int, list[str]] = {} 

402 for stem in stems: 

403 period_count = stem.count(".") 

404 matched_stems = result.get(period_count) 

405 if not matched_stems: 

406 matched_stems = [stem] 

407 result[period_count] = matched_stems 

408 else: 

409 matched_stems.append(stem) 

410 return result 

411 

412 

413def _find_main_package_name( 

414 binary_packages: Mapping[str, BinaryPackage], 

415 *, 

416 allow_fuzzy_matches: bool = False, 

417) -> str | None: 

418 main_packages = [p.name for p in binary_packages.values() if p.is_main_package] 

419 if not main_packages: 419 ↛ 420line 419 didn't jump to line 420 because the condition on line 419 was never true

420 assert allow_fuzzy_matches 

421 return next( 

422 iter(p.name for p in binary_packages.values() if "Package" in p.fields), 

423 None, 

424 ) 

425 return main_packages[0] 

426 

427 

428@dataclasses.dataclass(slots=True, frozen=True) 

429class PackagingFileClassification: 

430 path: VirtualPath 

431 packager_provided_files_per_package: None | ( 

432 Mapping[str, Sequence[PackagerProvidedFile]] 

433 ) 

434 

435 

436def classify_debian_packaging_files( 

437 plugin_feature_set: "PluginProvidedFeatureSet", 

438 debian_dir: VirtualPath, 

439 binary_packages: Mapping[str, BinaryPackage], 

440 *, 

441 allow_fuzzy_matches: bool = False, 

442 detect_typos: bool = False, 

443 ignore_paths: Container[str] = frozenset(), 

444) -> Iterable[PackagingFileClassification]: 

445 packager_provided_files = plugin_feature_set.packager_provided_files 

446 known_static_non_ppf_names: frozenset[str] = frozenset( 

447 { 

448 p.detection_value 

449 for p in plugin_feature_set.known_packaging_files.values() 

450 if p.detection_method == "path" 

451 } 

452 ) 

453 main_binary_package = _find_main_package_name( 

454 binary_packages, 

455 allow_fuzzy_matches=allow_fuzzy_matches, 

456 ) 

457 if main_binary_package is None: 457 ↛ 458line 457 didn't jump to line 458 because the condition on line 457 was never true

458 return {} 

459 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {} 

460 max_periods_in_package_name = max(name.count(".") for name in binary_packages) 

461 if detect_typos and CAN_DETECT_TYPOS: 

462 period2stems = _period_stem(packager_provided_files.keys()) 

463 else: 

464 period2stems = {} 

465 

466 paths = [] 

467 

468 for entry in debian_dir.iterdir: 

469 if entry.is_dir or entry.name.startswith("."): 

470 continue 

471 if entry.path in ignore_paths: 

472 continue 

473 paths.append(entry) 

474 matching_ppfs = _split_path( 

475 packager_provided_files, 

476 binary_packages, 

477 main_binary_package, 

478 max_periods_in_package_name, 

479 entry, 

480 allow_fuzzy_matches=allow_fuzzy_matches, 

481 period2stems=period2stems, 

482 known_static_non_ppf_names=known_static_non_ppf_names, 

483 ) 

484 for packager_provided_file in matching_ppfs: 

485 match_key = ( 

486 packager_provided_file.package_name, 

487 packager_provided_file.definition.stem, 

488 packager_provided_file.provided_key, 

489 ) 

490 existing = provided_files_by_key.get(match_key) 

491 if ( 

492 existing is not None 

493 and existing.match_priority > packager_provided_file.match_priority 

494 ): 

495 continue 

496 provided_files_by_key[match_key] = packager_provided_file 

497 

498 paths2ppfs_per_package = {} 

499 for packager_provided_file in provided_files_by_key.values(): 

500 package_name = packager_provided_file.package_name 

501 path_name = packager_provided_file.path.path 

502 ppfs_per_package = paths2ppfs_per_package.get(path_name) 

503 if ppfs_per_package is None: 

504 ppfs_per_package = collections.defaultdict(list) 

505 paths2ppfs_per_package[path_name] = ppfs_per_package 

506 ppfs_per_package[package_name].append(packager_provided_file) 

507 

508 for entry in paths: 

509 yield PackagingFileClassification( 

510 entry, 

511 paths2ppfs_per_package.get(entry.path), 

512 ) 

513 

514 

515def detect_all_packager_provided_files( 

516 plugin_feature_set: "PluginProvidedFeatureSet", 

517 debian_dir: VirtualPath, 

518 binary_packages: Mapping[str, BinaryPackage], 

519 *, 

520 allow_fuzzy_matches: bool = False, 

521 detect_typos: bool = False, 

522 ignore_paths: Container[str] = frozenset(), 

523) -> dict[str, PerPackagePackagerProvidedResult]: 

524 result = { 

525 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list)) 

526 for n in binary_packages 

527 } 

528 for classified_path in classify_debian_packaging_files( 

529 plugin_feature_set, 

530 debian_dir, 

531 binary_packages, 

532 allow_fuzzy_matches=allow_fuzzy_matches, 

533 detect_typos=detect_typos, 

534 ignore_paths=ignore_paths, 

535 ): 

536 provided_files = classified_path.packager_provided_files_per_package 

537 if not provided_files: 

538 continue 

539 for package_name, provided_file_data in provided_files.items(): 

540 per_package_result = result[package_name] 

541 per_package_result.auto_installable.extend( 

542 x for x in provided_file_data if not x.definition.reservation_only 

543 ) 

544 reservation_only = per_package_result.reserved_only 

545 for packager_provided_file in provided_file_data: 

546 if not packager_provided_file.definition.reservation_only: 

547 continue 

548 reservation_only[packager_provided_file.definition.stem].append( 

549 packager_provided_file 

550 ) 

551 

552 return result