Coverage for src/debputy/packager_provided_files.py: 88%

233 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-09-07 09:27 +0000

1import collections 

2import dataclasses 

3from typing import ( 

4 Mapping, 

5 Iterable, 

6 Dict, 

7 List, 

8 Optional, 

9 Tuple, 

10 Sequence, 

11 Container, 

12 TYPE_CHECKING, 

13 FrozenSet, 

14) 

15 

16from debputy.packages import BinaryPackage 

17from debputy.plugin.api import VirtualPath 

18from debputy.plugin.api.impl_types import ( 

19 PackagerProvidedFileClassSpec, 

20 PluginProvidedKnownPackagingFile, 

21) 

22from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo 

23 

24if TYPE_CHECKING: 

25 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet 

26 

27 

28_KNOWN_NON_TYPO_EXTENSIONS = frozenset( 

29 { 

30 "conf", 

31 "config", 

32 "sh", 

33 "yml", 

34 "yaml", 

35 "json", 

36 "bash", 

37 "pl", 

38 "py", 

39 "md", 

40 "rst", 

41 # Fairly common image format in older packages 

42 "xpm", 

43 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives. 

44 "bak", 

45 "tmp", 

46 "temp", 

47 "orig", 

48 "rej", 

49 } 

50) 

51 

52 

53@dataclasses.dataclass(frozen=True, slots=True) 

54class PackagerProvidedFile: 

55 path: VirtualPath 

56 package_name: str 

57 installed_as_basename: str 

58 provided_key: str 

59 definition: PackagerProvidedFileClassSpec 

60 match_priority: int = 0 

61 fuzzy_match: bool = False 

62 uses_explicit_package_name: bool = False 

63 name_segment: Optional[str] = None 

64 architecture_restriction: Optional[str] = None 

65 expected_path: Optional[str] = None 

66 

67 def compute_dest(self) -> Tuple[str, str]: 

68 return self.definition.compute_dest( 

69 self.installed_as_basename, 

70 owning_package=self.package_name, 

71 path=self.path, 

72 ) 

73 

74 

75@dataclasses.dataclass(slots=True) 

76class PerPackagePackagerProvidedResult: 

77 auto_installable: List[PackagerProvidedFile] 

78 reserved_only: Dict[str, List[PackagerProvidedFile]] 

79 

80 

81def _find_package_name_prefix( 

82 binary_packages: Mapping[str, BinaryPackage], 

83 main_binary_package: str, 

84 max_periods_in_package_name: int, 

85 path: VirtualPath, 

86 *, 

87 allow_fuzzy_matches: bool = False, 

88) -> Iterable[Tuple[str, str, bool, bool]]: 

89 if max_periods_in_package_name < 1: 

90 prefix, remaining = path.name.split(".", 1) 

91 package_name = prefix 

92 bug_950723 = False 

93 if allow_fuzzy_matches and package_name.endswith("@"): 

94 package_name = package_name[:-1] 

95 bug_950723 = True 

96 if package_name in binary_packages: 

97 yield package_name, remaining, True, bug_950723 

98 else: 

99 yield main_binary_package, path.name, False, False 

100 return 

101 

102 parts = path.name.split(".", max_periods_in_package_name + 1) 

103 for p in range(len(parts) - 1, 0, -1): 

104 name = ".".join(parts[0:p]) 

105 bug_950723 = False 

106 if allow_fuzzy_matches and name.endswith("@"): 106 ↛ 107line 106 didn't jump to line 107 because the condition on line 106 was never true

107 name = name[:-1] 

108 bug_950723 = True 

109 

110 if name in binary_packages: 

111 remaining = ".".join(parts[p:]) 

112 yield name, remaining, True, bug_950723 

113 # main package case 

114 yield main_binary_package, path.name, False, False 

115 

116 

117def _iterate_stem_splits(basename: str) -> Tuple[str, str, int]: 

118 stem = basename 

119 period_count = stem.count(".") 

120 yield stem, None, period_count 

121 install_as_name = "" 

122 while period_count > 0: 

123 period_count -= 1 

124 install_as_name_part, stem = stem.split(".", 1) 

125 install_as_name = ( 

126 install_as_name + "." + install_as_name_part 

127 if install_as_name != "" 

128 else install_as_name_part 

129 ) 

130 yield stem, install_as_name, period_count 

131 

132 

133def _find_definition( 

134 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

135 basename: str, 

136 *, 

137 period2stems: Optional[Mapping[int, Sequence[str]]] = None, 

138 had_arch: bool = False, 

139) -> Tuple[Optional[str], Optional[PackagerProvidedFileClassSpec], Optional[str]]: 

140 for stem, install_as_name, period_count in _iterate_stem_splits(basename): 

141 definition = packager_provided_files.get(stem) 

142 if definition is not None: 

143 return install_as_name, definition, None 

144 if not period2stems: 

145 continue 

146 stems = period2stems.get(period_count) 

147 

148 if not stems: 

149 continue 

150 # If the stem is also the extension and a known one at that, then 

151 # we do not consider it a typo match (to avoid false positives). 

152 # 

153 # We also ignore "foo.1" since manpages are kind of common. 

154 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()): 

155 continue 

156 max_edit_distance = 2 if len(stem) > 3 else 1 

157 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance) 

158 if matches is not None and len(matches) == 1: 

159 definition = packager_provided_files[matches[0]] 

160 return install_as_name, definition, stem 

161 return None, None, None 

162 

163 

164def _check_mismatches( 

165 path: VirtualPath, 

166 definition: PackagerProvidedFileClassSpec, 

167 owning_package: BinaryPackage, 

168 install_as_name: Optional[str], 

169 had_arch: bool, 

170) -> None: 

171 if install_as_name is not None and not definition.allow_name_segment: 171 ↛ 172line 171 didn't jump to line 172 because the condition on line 171 was never true

172 _error( 

173 f'The file "{path.fs_path}" looks like a packager provided file for' 

174 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' 

175 " However, this file type does not allow custom naming. The file type was registered" 

176 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

177 " to file a bug/feature request." 

178 ) 

179 if had_arch: 

180 if owning_package.is_arch_all: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true

181 _error( 

182 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

183 f" {owning_package.name} of type {definition.stem}." 

184 " However, the package in question is arch:all. The use of architecture specific files" 

185 " for arch:all packages does not make sense." 

186 ) 

187 if not definition.allow_architecture_segment: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 _error( 

189 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' 

190 f" {owning_package.name} of type {definition.stem}." 

191 " However, this file type does not allow architecture specific variants. The file type was registered" 

192 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" 

193 " to file a bug/feature request." 

194 ) 

195 

196 

197def _split_basename( 

198 basename: str, 

199 owning_package: BinaryPackage, 

200 *, 

201 has_explicit_package: bool = False, 

202 allow_fuzzy_matches: bool = False, 

203) -> Tuple[str, int, Optional[str], bool]: 

204 match_priority = 1 if has_explicit_package else 0 

205 fuzzy_match = False 

206 arch_restriction: Optional[str] = None 

207 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 basename = basename[:-3] 

209 fuzzy_match = True 

210 

211 if "." in basename: 

212 remaining, last_word = basename.rsplit(".", 1) 

213 # We cannot use "resolved_architecture" as it would return "all". 

214 if last_word == owning_package.package_deb_architecture_variable("ARCH"): 

215 match_priority = 3 

216 basename = remaining 

217 arch_restriction = last_word 

218 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 218 ↛ 219line 218 didn't jump to line 219 because the condition on line 218 was never true

219 match_priority = 2 

220 basename = remaining 

221 arch_restriction = last_word 

222 elif last_word == "all" and owning_package.is_arch_all: 222 ↛ 225line 222 didn't jump to line 225 because the condition on line 222 was never true

223 # This case does not make sense, but we detect it, so we can report an error 

224 # via _check_mismatches. 

225 match_priority = -1 

226 basename = remaining 

227 arch_restriction = last_word 

228 

229 return basename, match_priority, arch_restriction, fuzzy_match 

230 

231 

232def _split_path( 

233 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], 

234 binary_packages: Mapping[str, BinaryPackage], 

235 main_binary_package: str, 

236 max_periods_in_package_name: int, 

237 path: VirtualPath, 

238 *, 

239 allow_fuzzy_matches: bool = False, 

240 period2stems: Optional[Mapping[int, Sequence[str]]] = None, 

241 known_static_non_ppf_names=frozenset(), 

242) -> Iterable[PackagerProvidedFile]: 

243 owning_package_name = main_binary_package 

244 basename = path.name 

245 match_priority = 0 

246 had_arch = False 

247 if "." not in basename: 

248 definition = packager_provided_files.get(basename) 

249 if definition is None: 

250 return 

251 if definition.packageless_is_fallback_for_all_packages: 

252 yield from ( 

253 PackagerProvidedFile( 

254 path=path, 

255 package_name=n, 

256 installed_as_basename=n, 

257 provided_key=".UNNAMED.", 

258 definition=definition, 

259 match_priority=match_priority, 

260 fuzzy_match=False, 

261 uses_explicit_package_name=False, 

262 name_segment=None, 

263 architecture_restriction=None, 

264 ) 

265 for n in binary_packages 

266 ) 

267 else: 

268 yield PackagerProvidedFile( 

269 path=path, 

270 package_name=owning_package_name, 

271 installed_as_basename=owning_package_name, 

272 provided_key=".UNNAMED.", 

273 definition=definition, 

274 match_priority=match_priority, 

275 fuzzy_match=False, 

276 uses_explicit_package_name=False, 

277 name_segment=None, 

278 architecture_restriction=None, 

279 ) 

280 return 

281 if f"debian/{path.name}" in known_static_non_ppf_names: 

282 return 

283 

284 for ( 

285 owning_package_name, 

286 basename, 

287 explicit_package, 

288 bug_950723, 

289 ) in _find_package_name_prefix( 

290 binary_packages, 

291 main_binary_package, 

292 max_periods_in_package_name, 

293 path, 

294 allow_fuzzy_matches=allow_fuzzy_matches, 

295 ): 

296 owning_package = binary_packages[owning_package_name] 

297 

298 basename, match_priority, arch_restriction, fuzzy_match = _split_basename( 

299 basename, 

300 owning_package, 

301 has_explicit_package=explicit_package, 

302 allow_fuzzy_matches=allow_fuzzy_matches, 

303 ) 

304 

305 install_as_name, definition, typoed_stem = _find_definition( 

306 packager_provided_files, 

307 basename, 

308 period2stems=period2stems, 

309 had_arch=bool(arch_restriction), 

310 ) 

311 if definition is None: 

312 continue 

313 

314 # Note: bug_950723 implies allow_fuzzy_matches 

315 if bug_950723 and not definition.bug_950723: 315 ↛ 316line 315 didn't jump to line 316 because the condition on line 315 was never true

316 continue 

317 

318 if not allow_fuzzy_matches: 

319 # LSP/Lint checks here but should not use `_check_mismatches` as 

320 # the hard error disrupts them. 

321 _check_mismatches( 

322 path, 

323 definition, 

324 owning_package, 

325 install_as_name, 

326 arch_restriction is not None, 

327 ) 

328 

329 expected_path: Optional[str] = None 

330 if ( 

331 definition.packageless_is_fallback_for_all_packages 

332 and install_as_name is None 

333 and not had_arch 

334 and not explicit_package 

335 and arch_restriction is None 

336 ): 

337 if typoed_stem is not None: 337 ↛ 338line 337 didn't jump to line 338 because the condition on line 337 was never true

338 parent_path = ( 

339 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

340 ) 

341 expected_path = f"{parent_path}{definition.stem}" 

342 if fuzzy_match and path.name.endswith(".in"): 

343 expected_path += ".in" 

344 yield from ( 

345 PackagerProvidedFile( 

346 path=path, 

347 package_name=n, 

348 installed_as_basename=f"{n}@" if bug_950723 else n, 

349 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", 

350 definition=definition, 

351 match_priority=match_priority, 

352 fuzzy_match=fuzzy_match, 

353 uses_explicit_package_name=False, 

354 name_segment=None, 

355 architecture_restriction=None, 

356 expected_path=expected_path, 

357 ) 

358 for n in binary_packages 

359 ) 

360 else: 

361 provided_key = ( 

362 install_as_name if install_as_name is not None else ".UNNAMED." 

363 ) 

364 basename = ( 

365 install_as_name if install_as_name is not None else owning_package_name 

366 ) 

367 if bug_950723: 

368 provided_key = f"{provided_key}@" 

369 basename = f"{basename}@" 

370 package_prefix = f"{owning_package_name}@" 

371 else: 

372 package_prefix = owning_package_name 

373 if typoed_stem: 

374 parent_path = ( 

375 path.parent_dir.path + "/" if path.parent_dir is not None else "" 

376 ) 

377 basename = definition.stem 

378 if install_as_name is not None: 378 ↛ 379line 378 didn't jump to line 379 because the condition on line 378 was never true

379 basename = f"{install_as_name}.{basename}" 

380 if explicit_package: 

381 basename = f"{package_prefix}.{basename}" 

382 if arch_restriction is not None and arch_restriction != "all": 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true

383 basename = f"{basename}.{arch_restriction}" 

384 expected_path = f"{parent_path}{basename}" 

385 if fuzzy_match and path.name.endswith(".in"): 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true

386 expected_path += ".in" 

387 yield PackagerProvidedFile( 

388 path=path, 

389 package_name=owning_package_name, 

390 installed_as_basename=basename, 

391 provided_key=provided_key, 

392 definition=definition, 

393 match_priority=match_priority, 

394 fuzzy_match=fuzzy_match, 

395 uses_explicit_package_name=bool(explicit_package), 

396 name_segment=install_as_name, 

397 architecture_restriction=arch_restriction, 

398 expected_path=expected_path, 

399 ) 

400 return 

401 

402 

403def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]: 

404 result: Dict[int, List[str]] = {} 

405 for stem in stems: 

406 period_count = stem.count(".") 

407 matched_stems = result.get(period_count) 

408 if not matched_stems: 

409 matched_stems = [stem] 

410 result[period_count] = matched_stems 

411 else: 

412 matched_stems.append(stem) 

413 return result 

414 

415 

416def _find_main_package_name( 

417 binary_packages: Mapping[str, BinaryPackage], 

418 *, 

419 allow_fuzzy_matches: bool = False, 

420) -> Optional[str]: 

421 main_packages = [p.name for p in binary_packages.values() if p.is_main_package] 

422 if not main_packages: 422 ↛ 423line 422 didn't jump to line 423 because the condition on line 422 was never true

423 assert allow_fuzzy_matches 

424 return next( 

425 iter(p.name for p in binary_packages.values() if "Package" in p.fields), 

426 None, 

427 ) 

428 return main_packages[0] 

429 

430 

431@dataclasses.dataclass(slots=True, frozen=True) 

432class PackagingFileClassification: 

433 path: VirtualPath 

434 packager_provided_files_per_package: Optional[ 

435 Mapping[str, Sequence[PackagerProvidedFile]] 

436 ] 

437 

438 

439def classify_debian_packaging_files( 

440 plugin_feature_set: "PluginProvidedFeatureSet", 

441 debian_dir: VirtualPath, 

442 binary_packages: Mapping[str, BinaryPackage], 

443 *, 

444 allow_fuzzy_matches: bool = False, 

445 detect_typos: bool = False, 

446 ignore_paths: Container[str] = frozenset(), 

447) -> Iterable[PackagingFileClassification]: 

448 packager_provided_files = plugin_feature_set.packager_provided_files 

449 known_static_non_ppf_names: FrozenSet[str] = frozenset( 

450 { 

451 p.detection_value 

452 for p in plugin_feature_set.known_packaging_files.values() 

453 if p.detection_method == "path" 

454 } 

455 ) 

456 main_binary_package = _find_main_package_name( 

457 binary_packages, 

458 allow_fuzzy_matches=allow_fuzzy_matches, 

459 ) 

460 if main_binary_package is None: 460 ↛ 461line 460 didn't jump to line 461 because the condition on line 460 was never true

461 return {} 

462 provided_files_by_key: Dict[Tuple[str, str, str], PackagerProvidedFile] = {} 

463 max_periods_in_package_name = max(name.count(".") for name in binary_packages) 

464 if detect_typos and CAN_DETECT_TYPOS: 

465 period2stems = _period_stem(packager_provided_files.keys()) 

466 else: 

467 period2stems = {} 

468 

469 paths = [] 

470 

471 for entry in debian_dir.iterdir: 

472 if entry.is_dir or entry.name.startswith("."): 

473 continue 

474 if entry.path in ignore_paths: 

475 continue 

476 paths.append(entry) 

477 matching_ppfs = _split_path( 

478 packager_provided_files, 

479 binary_packages, 

480 main_binary_package, 

481 max_periods_in_package_name, 

482 entry, 

483 allow_fuzzy_matches=allow_fuzzy_matches, 

484 period2stems=period2stems, 

485 known_static_non_ppf_names=known_static_non_ppf_names, 

486 ) 

487 for packager_provided_file in matching_ppfs: 

488 match_key = ( 

489 packager_provided_file.package_name, 

490 packager_provided_file.definition.stem, 

491 packager_provided_file.provided_key, 

492 ) 

493 existing = provided_files_by_key.get(match_key) 

494 if ( 

495 existing is not None 

496 and existing.match_priority > packager_provided_file.match_priority 

497 ): 

498 continue 

499 provided_files_by_key[match_key] = packager_provided_file 

500 

501 paths2ppfs_per_package = {} 

502 for packager_provided_file in provided_files_by_key.values(): 

503 package_name = packager_provided_file.package_name 

504 path_name = packager_provided_file.path.path 

505 ppfs_per_package = paths2ppfs_per_package.get(path_name) 

506 if ppfs_per_package is None: 

507 ppfs_per_package = collections.defaultdict(list) 

508 paths2ppfs_per_package[path_name] = ppfs_per_package 

509 ppfs_per_package[package_name].append(packager_provided_file) 

510 

511 for entry in paths: 

512 yield PackagingFileClassification( 

513 entry, 

514 paths2ppfs_per_package.get(entry.path), 

515 ) 

516 

517 

518def detect_all_packager_provided_files( 

519 plugin_feature_set: "PluginProvidedFeatureSet", 

520 debian_dir: VirtualPath, 

521 binary_packages: Mapping[str, BinaryPackage], 

522 *, 

523 allow_fuzzy_matches: bool = False, 

524 detect_typos: bool = False, 

525 ignore_paths: Container[str] = frozenset(), 

526) -> Dict[str, PerPackagePackagerProvidedResult]: 

527 result = { 

528 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list)) 

529 for n in binary_packages 

530 } 

531 for classified_path in classify_debian_packaging_files( 

532 plugin_feature_set, 

533 debian_dir, 

534 binary_packages, 

535 allow_fuzzy_matches=allow_fuzzy_matches, 

536 detect_typos=detect_typos, 

537 ignore_paths=ignore_paths, 

538 ): 

539 provided_files = classified_path.packager_provided_files_per_package 

540 if not provided_files: 

541 continue 

542 for package_name, provided_file_data in provided_files.items(): 

543 per_package_result = result[package_name] 

544 per_package_result.auto_installable.extend( 

545 x for x in provided_file_data if not x.definition.reservation_only 

546 ) 

547 reservation_only = per_package_result.reserved_only 

548 for packager_provided_file in provided_file_data: 

549 if not packager_provided_file.definition.reservation_only: 

550 continue 

551 reservation_only[packager_provided_file.definition.stem].append( 

552 packager_provided_file 

553 ) 

554 

555 return result