Coverage for src/debputy/packager_provided_files.py: 89%
234 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
1import collections
2import dataclasses
3from typing import (
4 Dict,
5 List,
6 Optional,
7 Tuple,
8 TYPE_CHECKING,
9 FrozenSet,
10)
11from collections.abc import Mapping, Iterable, Sequence, Container
13from debputy.packages import BinaryPackage
14from debputy.plugin.api import VirtualPath
15from debputy.plugin.api.impl_types import (
16 PackagerProvidedFileClassSpec,
17 PluginProvidedKnownPackagingFile,
18)
19from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo
21if TYPE_CHECKING:
22 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet
25_KNOWN_NON_TYPO_EXTENSIONS = frozenset(
26 {
27 "conf",
28 "config",
29 "sh",
30 "yml",
31 "yaml",
32 "json",
33 "bash",
34 "pl",
35 "py",
36 "md",
37 "rst",
38 # Fairly common image format in older packages
39 "xpm",
40 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives.
41 "bak",
42 "tmp",
43 "temp",
44 "orig",
45 "rej",
46 }
47)
50@dataclasses.dataclass(frozen=True, slots=True)
51class PackagerProvidedFile:
52 path: VirtualPath
53 package_name: str
54 installed_as_basename: str
55 provided_key: str
56 definition: PackagerProvidedFileClassSpec
57 match_priority: int = 0
58 fuzzy_match: bool = False
59 uses_explicit_package_name: bool = False
60 name_segment: str | None = None
61 architecture_restriction: str | None = None
62 expected_path: str | None = None
64 def compute_dest(self) -> tuple[str, str]:
65 return self.definition.compute_dest(
66 self.installed_as_basename,
67 owning_package=self.package_name,
68 path=self.path,
69 )
72@dataclasses.dataclass(slots=True)
73class PerPackagePackagerProvidedResult:
74 auto_installable: list[PackagerProvidedFile]
75 reserved_only: dict[str, list[PackagerProvidedFile]]
78def _find_package_name_prefix(
79 binary_packages: Mapping[str, BinaryPackage],
80 main_binary_package: str,
81 max_periods_in_package_name: int,
82 path: VirtualPath,
83 *,
84 allow_fuzzy_matches: bool = False,
85) -> Iterable[tuple[str, str, bool, bool]]:
86 if max_periods_in_package_name < 1:
87 prefix, remaining = path.name.split(".", 1)
88 package_name = prefix
89 bug_950723 = False
90 if allow_fuzzy_matches and package_name.endswith("@"):
91 package_name = package_name[:-1]
92 bug_950723 = True
93 if package_name in binary_packages:
94 yield package_name, remaining, True, bug_950723
95 else:
96 yield main_binary_package, path.name, False, False
97 return
99 parts = path.name.split(".", max_periods_in_package_name + 1)
100 for p in range(len(parts) - 1, 0, -1):
101 name = ".".join(parts[0:p])
102 bug_950723 = False
103 if allow_fuzzy_matches and name.endswith("@"): 103 ↛ 104line 103 didn't jump to line 104 because the condition on line 103 was never true
104 name = name[:-1]
105 bug_950723 = True
107 if name in binary_packages:
108 remaining = ".".join(parts[p:])
109 yield name, remaining, True, bug_950723
110 # main package case
111 yield main_binary_package, path.name, False, False
114def _iterate_stem_splits(basename: str) -> tuple[str, str, int]:
115 stem = basename
116 period_count = stem.count(".")
117 yield stem, None, period_count
118 install_as_name = ""
119 while period_count > 0:
120 period_count -= 1
121 install_as_name_part, stem = stem.split(".", 1)
122 install_as_name = (
123 install_as_name + "." + install_as_name_part
124 if install_as_name != ""
125 else install_as_name_part
126 )
127 yield stem, install_as_name, period_count
130def _find_definition(
131 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
132 basename: str,
133 *,
134 period2stems: Mapping[int, Sequence[str]] | None = None,
135 had_arch: bool = False,
136) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]:
137 for stem, install_as_name, period_count in _iterate_stem_splits(basename):
138 definition = packager_provided_files.get(stem)
139 if definition is not None:
140 return install_as_name, definition, None
141 if not period2stems:
142 continue
143 stems = period2stems.get(period_count)
145 if not stems:
146 continue
147 # If the stem is also the extension and a known one at that, then
148 # we do not consider it a typo match (to avoid false positives).
149 #
150 # We also ignore "foo.1" since manpages are kind of common.
151 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()):
152 continue
153 max_edit_distance = 2 if len(stem) > 3 else 1
154 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance)
155 if matches is not None and len(matches) == 1:
156 definition = packager_provided_files[matches[0]]
157 return install_as_name, definition, stem
158 return None, None, None
161def _check_mismatches(
162 path: VirtualPath,
163 definition: PackagerProvidedFileClassSpec,
164 owning_package: BinaryPackage,
165 install_as_name: str | None,
166 had_arch: bool,
167) -> None:
168 if install_as_name is not None and not definition.allow_name_segment: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true
169 _error(
170 f'The file "{path.fs_path}" looks like a packager provided file for'
171 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".'
172 " However, this file type does not allow custom naming. The file type was registered"
173 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
174 " to file a bug/feature request."
175 )
176 if had_arch:
177 if owning_package.is_arch_all: 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true
178 _error(
179 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
180 f" {owning_package.name} of type {definition.stem}."
181 " However, the package in question is arch:all. The use of architecture specific files"
182 " for arch:all packages does not make sense."
183 )
184 if not definition.allow_architecture_segment: 184 ↛ 185line 184 didn't jump to line 185 because the condition on line 184 was never true
185 _error(
186 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
187 f" {owning_package.name} of type {definition.stem}."
188 " However, this file type does not allow architecture specific variants. The file type was registered"
189 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
190 " to file a bug/feature request."
191 )
194def _split_basename(
195 basename: str,
196 owning_package: BinaryPackage,
197 *,
198 has_explicit_package: bool = False,
199 allow_fuzzy_matches: bool = False,
200) -> tuple[str, int, str | None, bool]:
201 match_priority = 1 if has_explicit_package else 0
202 fuzzy_match = False
203 arch_restriction: str | None = None
204 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 204 ↛ 205line 204 didn't jump to line 205 because the condition on line 204 was never true
205 basename = basename[:-3]
206 fuzzy_match = True
208 if "." in basename:
209 remaining, last_word = basename.rsplit(".", 1)
210 # We cannot use "resolved_architecture" as it would return "all".
211 if last_word == owning_package.package_deb_architecture_variable("ARCH"):
212 match_priority = 3
213 basename = remaining
214 arch_restriction = last_word
215 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 215 ↛ 216line 215 didn't jump to line 216 because the condition on line 215 was never true
216 match_priority = 2
217 basename = remaining
218 arch_restriction = last_word
219 elif last_word == "all" and owning_package.is_arch_all: 219 ↛ 222line 219 didn't jump to line 222 because the condition on line 219 was never true
220 # This case does not make sense, but we detect it, so we can report an error
221 # via _check_mismatches.
222 match_priority = -1
223 basename = remaining
224 arch_restriction = last_word
226 return basename, match_priority, arch_restriction, fuzzy_match
229def _split_path(
230 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
231 binary_packages: Mapping[str, BinaryPackage],
232 main_binary_package: str,
233 max_periods_in_package_name: int,
234 path: VirtualPath,
235 *,
236 allow_fuzzy_matches: bool = False,
237 period2stems: Mapping[int, Sequence[str]] | None = None,
238 known_static_non_ppf_names=frozenset(),
239) -> Iterable[PackagerProvidedFile]:
240 owning_package_name = main_binary_package
241 basename = path.name
242 match_priority = 0
243 had_arch = False
244 if "." not in basename:
245 definition = packager_provided_files.get(basename)
246 if definition is None:
247 return
248 if definition.packageless_is_fallback_for_all_packages:
249 yield from (
250 PackagerProvidedFile(
251 path=path,
252 package_name=n,
253 installed_as_basename=n,
254 provided_key=".UNNAMED.",
255 definition=definition,
256 match_priority=match_priority,
257 fuzzy_match=False,
258 uses_explicit_package_name=False,
259 name_segment=None,
260 architecture_restriction=None,
261 )
262 for n in binary_packages
263 )
264 else:
265 yield PackagerProvidedFile(
266 path=path,
267 package_name=owning_package_name,
268 installed_as_basename=owning_package_name,
269 provided_key=".UNNAMED.",
270 definition=definition,
271 match_priority=match_priority,
272 fuzzy_match=False,
273 uses_explicit_package_name=False,
274 name_segment=None,
275 architecture_restriction=None,
276 )
277 return
278 if f"debian/{path.name}" in known_static_non_ppf_names:
279 return
281 for (
282 owning_package_name,
283 basename,
284 explicit_package,
285 bug_950723,
286 ) in _find_package_name_prefix(
287 binary_packages,
288 main_binary_package,
289 max_periods_in_package_name,
290 path,
291 allow_fuzzy_matches=allow_fuzzy_matches,
292 ):
293 owning_package = binary_packages[owning_package_name]
295 basename, match_priority, arch_restriction, fuzzy_match = _split_basename(
296 basename,
297 owning_package,
298 has_explicit_package=explicit_package,
299 allow_fuzzy_matches=allow_fuzzy_matches,
300 )
302 install_as_name, definition, typoed_stem = _find_definition(
303 packager_provided_files,
304 basename,
305 period2stems=period2stems,
306 had_arch=bool(arch_restriction),
307 )
308 if definition is None:
309 continue
311 # Note: bug_950723 implies allow_fuzzy_matches
312 if bug_950723 and not definition.bug_950723: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true
313 continue
315 if not allow_fuzzy_matches:
316 # LSP/Lint checks here but should not use `_check_mismatches` as
317 # the hard error disrupts them.
318 _check_mismatches(
319 path,
320 definition,
321 owning_package,
322 install_as_name,
323 arch_restriction is not None,
324 )
326 expected_path: str | None = None
327 if (
328 definition.packageless_is_fallback_for_all_packages
329 and install_as_name is None
330 and not had_arch
331 and not explicit_package
332 and arch_restriction is None
333 ):
334 if typoed_stem is not None: 334 ↛ 335line 334 didn't jump to line 335 because the condition on line 334 was never true
335 parent_path = (
336 path.parent_dir.path + "/" if path.parent_dir is not None else ""
337 )
338 expected_path = f"{parent_path}{definition.stem}"
339 if fuzzy_match and path.name.endswith(".in"):
340 expected_path += ".in"
341 yield from (
342 PackagerProvidedFile(
343 path=path,
344 package_name=n,
345 installed_as_basename=f"{n}@" if bug_950723 else n,
346 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.",
347 definition=definition,
348 match_priority=match_priority,
349 fuzzy_match=fuzzy_match,
350 uses_explicit_package_name=False,
351 name_segment=None,
352 architecture_restriction=None,
353 expected_path=expected_path,
354 )
355 for n in binary_packages
356 )
357 else:
358 provided_key = (
359 install_as_name if install_as_name is not None else ".UNNAMED."
360 )
361 basename = (
362 install_as_name if install_as_name is not None else owning_package_name
363 )
364 if bug_950723:
365 provided_key = f"{provided_key}@"
366 basename = f"{basename}@"
367 package_prefix = f"{owning_package_name}@"
368 else:
369 package_prefix = owning_package_name
370 if typoed_stem:
371 parent_path = (
372 path.parent_dir.path + "/" if path.parent_dir is not None else ""
373 )
374 basename = definition.stem
375 if install_as_name is not None: 375 ↛ 376line 375 didn't jump to line 376 because the condition on line 375 was never true
376 basename = f"{install_as_name}.{basename}"
377 if explicit_package:
378 basename = f"{package_prefix}.{basename}"
379 if arch_restriction is not None and arch_restriction != "all": 379 ↛ 380line 379 didn't jump to line 380 because the condition on line 379 was never true
380 basename = f"{basename}.{arch_restriction}"
381 expected_path = f"{parent_path}{basename}"
382 if fuzzy_match and path.name.endswith(".in"): 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true
383 expected_path += ".in"
384 yield PackagerProvidedFile(
385 path=path,
386 package_name=owning_package_name,
387 installed_as_basename=basename,
388 provided_key=provided_key,
389 definition=definition,
390 match_priority=match_priority,
391 fuzzy_match=fuzzy_match,
392 uses_explicit_package_name=bool(explicit_package),
393 name_segment=install_as_name,
394 architecture_restriction=arch_restriction,
395 expected_path=expected_path,
396 )
397 return
400def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]:
401 result: dict[int, list[str]] = {}
402 for stem in stems:
403 period_count = stem.count(".")
404 matched_stems = result.get(period_count)
405 if not matched_stems:
406 matched_stems = [stem]
407 result[period_count] = matched_stems
408 else:
409 matched_stems.append(stem)
410 return result
413def _find_main_package_name(
414 binary_packages: Mapping[str, BinaryPackage],
415 *,
416 allow_fuzzy_matches: bool = False,
417) -> str | None:
418 main_packages = [p.name for p in binary_packages.values() if p.is_main_package]
419 if not main_packages: 419 ↛ 420line 419 didn't jump to line 420 because the condition on line 419 was never true
420 assert allow_fuzzy_matches
421 return next(
422 iter(p.name for p in binary_packages.values() if "Package" in p.fields),
423 None,
424 )
425 return main_packages[0]
428@dataclasses.dataclass(slots=True, frozen=True)
429class PackagingFileClassification:
430 path: VirtualPath
431 packager_provided_files_per_package: None | (
432 Mapping[str, Sequence[PackagerProvidedFile]]
433 )
436def classify_debian_packaging_files(
437 plugin_feature_set: "PluginProvidedFeatureSet",
438 debian_dir: VirtualPath,
439 binary_packages: Mapping[str, BinaryPackage],
440 *,
441 allow_fuzzy_matches: bool = False,
442 detect_typos: bool = False,
443 ignore_paths: Container[str] = frozenset(),
444) -> Iterable[PackagingFileClassification]:
445 packager_provided_files = plugin_feature_set.packager_provided_files
446 known_static_non_ppf_names: frozenset[str] = frozenset(
447 {
448 p.detection_value
449 for p in plugin_feature_set.known_packaging_files.values()
450 if p.detection_method == "path"
451 }
452 )
453 main_binary_package = _find_main_package_name(
454 binary_packages,
455 allow_fuzzy_matches=allow_fuzzy_matches,
456 )
457 if main_binary_package is None: 457 ↛ 458line 457 didn't jump to line 458 because the condition on line 457 was never true
458 return {}
459 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {}
460 max_periods_in_package_name = max(name.count(".") for name in binary_packages)
461 if detect_typos and CAN_DETECT_TYPOS:
462 period2stems = _period_stem(packager_provided_files.keys())
463 else:
464 period2stems = {}
466 paths = []
468 for entry in debian_dir.iterdir:
469 if entry.is_dir or entry.name.startswith("."):
470 continue
471 if entry.path in ignore_paths:
472 continue
473 paths.append(entry)
474 matching_ppfs = _split_path(
475 packager_provided_files,
476 binary_packages,
477 main_binary_package,
478 max_periods_in_package_name,
479 entry,
480 allow_fuzzy_matches=allow_fuzzy_matches,
481 period2stems=period2stems,
482 known_static_non_ppf_names=known_static_non_ppf_names,
483 )
484 for packager_provided_file in matching_ppfs:
485 match_key = (
486 packager_provided_file.package_name,
487 packager_provided_file.definition.stem,
488 packager_provided_file.provided_key,
489 )
490 existing = provided_files_by_key.get(match_key)
491 if (
492 existing is not None
493 and existing.match_priority > packager_provided_file.match_priority
494 ):
495 continue
496 provided_files_by_key[match_key] = packager_provided_file
498 paths2ppfs_per_package = {}
499 for packager_provided_file in provided_files_by_key.values():
500 package_name = packager_provided_file.package_name
501 path_name = packager_provided_file.path.path
502 ppfs_per_package = paths2ppfs_per_package.get(path_name)
503 if ppfs_per_package is None:
504 ppfs_per_package = collections.defaultdict(list)
505 paths2ppfs_per_package[path_name] = ppfs_per_package
506 ppfs_per_package[package_name].append(packager_provided_file)
508 for entry in paths:
509 yield PackagingFileClassification(
510 entry,
511 paths2ppfs_per_package.get(entry.path),
512 )
515def detect_all_packager_provided_files(
516 plugin_feature_set: "PluginProvidedFeatureSet",
517 debian_dir: VirtualPath,
518 binary_packages: Mapping[str, BinaryPackage],
519 *,
520 allow_fuzzy_matches: bool = False,
521 detect_typos: bool = False,
522 ignore_paths: Container[str] = frozenset(),
523) -> dict[str, PerPackagePackagerProvidedResult]:
524 result = {
525 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list))
526 for n in binary_packages
527 }
528 for classified_path in classify_debian_packaging_files(
529 plugin_feature_set,
530 debian_dir,
531 binary_packages,
532 allow_fuzzy_matches=allow_fuzzy_matches,
533 detect_typos=detect_typos,
534 ignore_paths=ignore_paths,
535 ):
536 provided_files = classified_path.packager_provided_files_per_package
537 if not provided_files:
538 continue
539 for package_name, provided_file_data in provided_files.items():
540 per_package_result = result[package_name]
541 per_package_result.auto_installable.extend(
542 x for x in provided_file_data if not x.definition.reservation_only
543 )
544 reservation_only = per_package_result.reserved_only
545 for packager_provided_file in provided_file_data:
546 if not packager_provided_file.definition.reservation_only:
547 continue
548 reservation_only[packager_provided_file.definition.stem].append(
549 packager_provided_file
550 )
552 return result