Coverage for src/debputy/packager_provided_files.py: 89%
234 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-01-26 19:30 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2026-01-26 19:30 +0000
1import collections.abc
2import dataclasses
3from typing import (
4 Dict,
5 List,
6 Optional,
7 Tuple,
8 TYPE_CHECKING,
9 FrozenSet,
10)
11from collections.abc import Mapping, Iterable, Sequence, Container
13from debputy.packages import BinaryPackage
14from debputy.plugin.api import VirtualPath
15from debputy.plugin.api.impl_types import (
16 PackagerProvidedFileClassSpec,
17 PluginProvidedKnownPackagingFile,
18)
19from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo
21if TYPE_CHECKING:
22 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet
25_KNOWN_NON_TYPO_EXTENSIONS = frozenset(
26 {
27 "conf",
28 "config",
29 "sh",
30 "yml",
31 "yaml",
32 "json",
33 "bash",
34 "pl",
35 "py",
36 "md",
37 "rst",
38 # Fairly common image format in older packages
39 "xpm",
40 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives.
41 "bak",
42 "tmp",
43 "temp",
44 "orig",
45 "rej",
46 }
47)
50@dataclasses.dataclass(frozen=True, slots=True)
51class PackagerProvidedFile:
52 path: VirtualPath
53 package_name: str
54 installed_as_basename: str
55 provided_key: str
56 definition: PackagerProvidedFileClassSpec
57 match_priority: int = 0
58 fuzzy_match: bool = False
59 uses_explicit_package_name: bool = False
60 name_segment: str | None = None
61 architecture_restriction: str | None = None
62 expected_path: str | None = None
64 def compute_dest(self) -> tuple[str, str]:
65 return self.definition.compute_dest(
66 self.installed_as_basename,
67 owning_package=self.package_name,
68 path=self.path,
69 )
72@dataclasses.dataclass(slots=True)
73class PerPackagePackagerProvidedResult:
74 auto_installable: list[PackagerProvidedFile]
75 reserved_only: dict[str, list[PackagerProvidedFile]]
78def _find_package_name_prefix(
79 binary_packages: Mapping[str, BinaryPackage],
80 main_binary_package: str,
81 max_periods_in_package_name: int,
82 path: VirtualPath,
83 *,
84 allow_fuzzy_matches: bool = False,
85) -> Iterable[tuple[str, str, bool, bool]]:
86 if max_periods_in_package_name < 1:
87 prefix, remaining = path.name.split(".", 1)
88 package_name = prefix
89 bug_950723 = False
90 if allow_fuzzy_matches and package_name.endswith("@"):
91 package_name = package_name[:-1]
92 bug_950723 = True
93 if package_name in binary_packages:
94 yield package_name, remaining, True, bug_950723
95 else:
96 yield main_binary_package, path.name, False, False
97 return
99 parts = path.name.split(".", max_periods_in_package_name + 1)
100 for p in range(len(parts) - 1, 0, -1):
101 name = ".".join(parts[0:p])
102 bug_950723 = False
103 if allow_fuzzy_matches and name.endswith("@"): 103 ↛ 104line 103 didn't jump to line 104 because the condition on line 103 was never true
104 name = name[:-1]
105 bug_950723 = True
107 if name in binary_packages:
108 remaining = ".".join(parts[p:])
109 yield name, remaining, True, bug_950723
110 # main package case
111 yield main_binary_package, path.name, False, False
114def _iterate_stem_splits(
115 basename: str,
116) -> collections.abc.Iterator[tuple[str, str | None, int]]:
117 stem = basename
118 period_count = stem.count(".")
119 yield stem, None, period_count
120 install_as_name = ""
121 while period_count > 0:
122 period_count -= 1
123 install_as_name_part, stem = stem.split(".", 1)
124 install_as_name = (
125 install_as_name + "." + install_as_name_part
126 if install_as_name != ""
127 else install_as_name_part
128 )
129 yield stem, install_as_name, period_count
132def _find_definition(
133 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
134 basename: str,
135 *,
136 period2stems: Mapping[int, Sequence[str]] | None = None,
137 had_arch: bool = False,
138) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]:
139 for stem, install_as_name, period_count in _iterate_stem_splits(basename):
140 definition = packager_provided_files.get(stem)
141 if definition is not None:
142 return install_as_name, definition, None
143 if not period2stems:
144 continue
145 stems = period2stems.get(period_count)
147 if not stems:
148 continue
149 # If the stem is also the extension and a known one at that, then
150 # we do not consider it a typo match (to avoid false positives).
151 #
152 # We also ignore "foo.1" since manpages are kind of common.
153 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()):
154 continue
155 max_edit_distance = 2 if len(stem) > 3 else 1
156 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance)
157 if matches is not None and len(matches) == 1:
158 definition = packager_provided_files[matches[0]]
159 return install_as_name, definition, stem
160 return None, None, None
163def _check_mismatches(
164 path: VirtualPath,
165 definition: PackagerProvidedFileClassSpec,
166 owning_package: BinaryPackage,
167 install_as_name: str | None,
168 had_arch: bool,
169) -> None:
170 if install_as_name is not None and not definition.allow_name_segment: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true
171 _error(
172 f'The file "{path.fs_path}" looks like a packager provided file for'
173 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".'
174 " However, this file type does not allow custom naming. The file type was registered"
175 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
176 " to file a bug/feature request."
177 )
178 if had_arch:
179 if owning_package.is_arch_all: 179 ↛ 180line 179 didn't jump to line 180 because the condition on line 179 was never true
180 _error(
181 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
182 f" {owning_package.name} of type {definition.stem}."
183 " However, the package in question is arch:all. The use of architecture specific files"
184 " for arch:all packages does not make sense."
185 )
186 if not definition.allow_architecture_segment: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true
187 _error(
188 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
189 f" {owning_package.name} of type {definition.stem}."
190 " However, this file type does not allow architecture specific variants. The file type was registered"
191 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
192 " to file a bug/feature request."
193 )
196def _split_basename(
197 basename: str,
198 owning_package: BinaryPackage,
199 *,
200 has_explicit_package: bool = False,
201 allow_fuzzy_matches: bool = False,
202) -> tuple[str, int, str | None, bool]:
203 match_priority = 1 if has_explicit_package else 0
204 fuzzy_match = False
205 arch_restriction: str | None = None
206 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 206 ↛ 207line 206 didn't jump to line 207 because the condition on line 206 was never true
207 basename = basename[:-3]
208 fuzzy_match = True
210 if "." in basename:
211 remaining, last_word = basename.rsplit(".", 1)
212 # We cannot use "resolved_architecture" as it would return "all".
213 if last_word == owning_package.package_deb_architecture_variable("ARCH"):
214 match_priority = 3
215 basename = remaining
216 arch_restriction = last_word
217 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 217 ↛ 218line 217 didn't jump to line 218 because the condition on line 217 was never true
218 match_priority = 2
219 basename = remaining
220 arch_restriction = last_word
221 elif last_word == "all" and owning_package.is_arch_all: 221 ↛ 224line 221 didn't jump to line 224 because the condition on line 221 was never true
222 # This case does not make sense, but we detect it, so we can report an error
223 # via _check_mismatches.
224 match_priority = -1
225 basename = remaining
226 arch_restriction = last_word
228 return basename, match_priority, arch_restriction, fuzzy_match
231def _split_path(
232 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
233 binary_packages: Mapping[str, BinaryPackage],
234 main_binary_package: str,
235 max_periods_in_package_name: int,
236 path: VirtualPath,
237 *,
238 allow_fuzzy_matches: bool = False,
239 period2stems: Mapping[int, Sequence[str]] | None = None,
240 known_static_non_ppf_names=frozenset(),
241) -> Iterable[PackagerProvidedFile]:
242 owning_package_name = main_binary_package
243 basename = path.name
244 match_priority = 0
245 had_arch = False
246 if "." not in basename:
247 definition = packager_provided_files.get(basename)
248 if definition is None:
249 return
250 if definition.packageless_is_fallback_for_all_packages:
251 yield from (
252 PackagerProvidedFile(
253 path=path,
254 package_name=n,
255 installed_as_basename=n,
256 provided_key=".UNNAMED.",
257 definition=definition,
258 match_priority=match_priority,
259 fuzzy_match=False,
260 uses_explicit_package_name=False,
261 name_segment=None,
262 architecture_restriction=None,
263 )
264 for n in binary_packages
265 )
266 else:
267 yield PackagerProvidedFile(
268 path=path,
269 package_name=owning_package_name,
270 installed_as_basename=owning_package_name,
271 provided_key=".UNNAMED.",
272 definition=definition,
273 match_priority=match_priority,
274 fuzzy_match=False,
275 uses_explicit_package_name=False,
276 name_segment=None,
277 architecture_restriction=None,
278 )
279 return
280 if f"debian/{path.name}" in known_static_non_ppf_names:
281 return
283 for (
284 owning_package_name,
285 basename,
286 explicit_package,
287 bug_950723,
288 ) in _find_package_name_prefix(
289 binary_packages,
290 main_binary_package,
291 max_periods_in_package_name,
292 path,
293 allow_fuzzy_matches=allow_fuzzy_matches,
294 ):
295 owning_package = binary_packages[owning_package_name]
297 basename, match_priority, arch_restriction, fuzzy_match = _split_basename(
298 basename,
299 owning_package,
300 has_explicit_package=explicit_package,
301 allow_fuzzy_matches=allow_fuzzy_matches,
302 )
304 install_as_name, definition, typoed_stem = _find_definition(
305 packager_provided_files,
306 basename,
307 period2stems=period2stems,
308 had_arch=bool(arch_restriction),
309 )
310 if definition is None:
311 continue
313 # Note: bug_950723 implies allow_fuzzy_matches
314 if bug_950723 and not definition.bug_950723: 314 ↛ 315line 314 didn't jump to line 315 because the condition on line 314 was never true
315 continue
317 if not allow_fuzzy_matches:
318 # LSP/Lint checks here but should not use `_check_mismatches` as
319 # the hard error disrupts them.
320 _check_mismatches(
321 path,
322 definition,
323 owning_package,
324 install_as_name,
325 arch_restriction is not None,
326 )
328 expected_path: str | None = None
329 if (
330 definition.packageless_is_fallback_for_all_packages
331 and install_as_name is None
332 and not had_arch
333 and not explicit_package
334 and arch_restriction is None
335 ):
336 if typoed_stem is not None: 336 ↛ 337line 336 didn't jump to line 337 because the condition on line 336 was never true
337 parent_path = (
338 path.parent_dir.path + "/" if path.parent_dir is not None else ""
339 )
340 expected_path = f"{parent_path}{definition.stem}"
341 if fuzzy_match and path.name.endswith(".in"):
342 expected_path += ".in"
343 yield from (
344 PackagerProvidedFile(
345 path=path,
346 package_name=n,
347 installed_as_basename=f"{n}@" if bug_950723 else n,
348 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.",
349 definition=definition,
350 match_priority=match_priority,
351 fuzzy_match=fuzzy_match,
352 uses_explicit_package_name=False,
353 name_segment=None,
354 architecture_restriction=None,
355 expected_path=expected_path,
356 )
357 for n in binary_packages
358 )
359 else:
360 provided_key = (
361 install_as_name if install_as_name is not None else ".UNNAMED."
362 )
363 basename = (
364 install_as_name if install_as_name is not None else owning_package_name
365 )
366 if bug_950723:
367 provided_key = f"{provided_key}@"
368 basename = f"{basename}@"
369 package_prefix = f"{owning_package_name}@"
370 else:
371 package_prefix = owning_package_name
372 if typoed_stem:
373 parent_path = (
374 path.parent_dir.path + "/" if path.parent_dir is not None else ""
375 )
376 basename = definition.stem
377 if install_as_name is not None: 377 ↛ 378line 377 didn't jump to line 378 because the condition on line 377 was never true
378 basename = f"{install_as_name}.{basename}"
379 if explicit_package:
380 basename = f"{package_prefix}.{basename}"
381 if arch_restriction is not None and arch_restriction != "all": 381 ↛ 382line 381 didn't jump to line 382 because the condition on line 381 was never true
382 basename = f"{basename}.{arch_restriction}"
383 expected_path = f"{parent_path}{basename}"
384 if fuzzy_match and path.name.endswith(".in"): 384 ↛ 385line 384 didn't jump to line 385 because the condition on line 384 was never true
385 expected_path += ".in"
386 yield PackagerProvidedFile(
387 path=path,
388 package_name=owning_package_name,
389 installed_as_basename=basename,
390 provided_key=provided_key,
391 definition=definition,
392 match_priority=match_priority,
393 fuzzy_match=fuzzy_match,
394 uses_explicit_package_name=bool(explicit_package),
395 name_segment=install_as_name,
396 architecture_restriction=arch_restriction,
397 expected_path=expected_path,
398 )
399 return
402def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]:
403 result: dict[int, list[str]] = {}
404 for stem in stems:
405 period_count = stem.count(".")
406 matched_stems = result.get(period_count)
407 if not matched_stems:
408 matched_stems = [stem]
409 result[period_count] = matched_stems
410 else:
411 matched_stems.append(stem)
412 return result
415def _find_main_package_name(
416 binary_packages: Mapping[str, BinaryPackage],
417 *,
418 allow_fuzzy_matches: bool = False,
419) -> str | None:
420 main_packages = [p.name for p in binary_packages.values() if p.is_main_package]
421 if not main_packages: 421 ↛ 422line 421 didn't jump to line 422 because the condition on line 421 was never true
422 assert allow_fuzzy_matches
423 return next(
424 iter(p.name for p in binary_packages.values() if "Package" in p.fields),
425 None,
426 )
427 return main_packages[0]
430@dataclasses.dataclass(slots=True, frozen=True)
431class PackagingFileClassification:
432 path: VirtualPath
433 packager_provided_files_per_package: None | (
434 Mapping[str, Sequence[PackagerProvidedFile]]
435 )
438def classify_debian_packaging_files(
439 plugin_feature_set: "PluginProvidedFeatureSet",
440 debian_dir: VirtualPath,
441 binary_packages: Mapping[str, BinaryPackage],
442 *,
443 allow_fuzzy_matches: bool = False,
444 detect_typos: bool = False,
445 ignore_paths: Container[str] = frozenset(),
446) -> Iterable[PackagingFileClassification]:
447 packager_provided_files = plugin_feature_set.packager_provided_files
448 known_static_non_ppf_names: frozenset[str] = frozenset(
449 {
450 p.detection_value
451 for p in plugin_feature_set.known_packaging_files.values()
452 if p.detection_method == "path"
453 }
454 )
455 main_binary_package = _find_main_package_name(
456 binary_packages,
457 allow_fuzzy_matches=allow_fuzzy_matches,
458 )
459 if main_binary_package is None: 459 ↛ 460line 459 didn't jump to line 460 because the condition on line 459 was never true
460 return
461 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {}
462 max_periods_in_package_name = max(name.count(".") for name in binary_packages)
463 if detect_typos and CAN_DETECT_TYPOS:
464 period2stems = _period_stem(packager_provided_files.keys())
465 else:
466 period2stems = {}
468 paths = []
470 for entry in debian_dir.iterdir:
471 if entry.is_dir or entry.name.startswith("."):
472 continue
473 if entry.path in ignore_paths:
474 continue
475 paths.append(entry)
476 matching_ppfs = _split_path(
477 packager_provided_files,
478 binary_packages,
479 main_binary_package,
480 max_periods_in_package_name,
481 entry,
482 allow_fuzzy_matches=allow_fuzzy_matches,
483 period2stems=period2stems,
484 known_static_non_ppf_names=known_static_non_ppf_names,
485 )
486 for packager_provided_file in matching_ppfs:
487 match_key = (
488 packager_provided_file.package_name,
489 packager_provided_file.definition.stem,
490 packager_provided_file.provided_key,
491 )
492 existing = provided_files_by_key.get(match_key)
493 if (
494 existing is not None
495 and existing.match_priority > packager_provided_file.match_priority
496 ):
497 continue
498 provided_files_by_key[match_key] = packager_provided_file
500 paths2ppfs_per_package = dict[str, dict[str, list[PackagerProvidedFile]]]()
501 for packager_provided_file in provided_files_by_key.values():
502 package_name = packager_provided_file.package_name
503 path_name = packager_provided_file.path.path
504 ppfs_per_package = paths2ppfs_per_package.get(path_name)
505 if ppfs_per_package is None:
506 ppfs_per_package = collections.defaultdict(list)
507 paths2ppfs_per_package[path_name] = ppfs_per_package
508 ppfs_per_package[package_name].append(packager_provided_file)
510 for entry in paths:
511 yield PackagingFileClassification(
512 entry,
513 paths2ppfs_per_package.get(entry.path),
514 )
517def detect_all_packager_provided_files(
518 plugin_feature_set: "PluginProvidedFeatureSet",
519 debian_dir: VirtualPath,
520 binary_packages: Mapping[str, BinaryPackage],
521 *,
522 allow_fuzzy_matches: bool = False,
523 detect_typos: bool = False,
524 ignore_paths: Container[str] = frozenset(),
525) -> dict[str, PerPackagePackagerProvidedResult]:
526 result = {
527 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list))
528 for n in binary_packages
529 }
530 for classified_path in classify_debian_packaging_files(
531 plugin_feature_set,
532 debian_dir,
533 binary_packages,
534 allow_fuzzy_matches=allow_fuzzy_matches,
535 detect_typos=detect_typos,
536 ignore_paths=ignore_paths,
537 ):
538 provided_files = classified_path.packager_provided_files_per_package
539 if not provided_files:
540 continue
541 for package_name, provided_file_data in provided_files.items():
542 per_package_result = result[package_name]
543 per_package_result.auto_installable.extend(
544 x for x in provided_file_data if not x.definition.reservation_only
545 )
546 reservation_only = per_package_result.reserved_only
547 for packager_provided_file in provided_file_data:
548 if not packager_provided_file.definition.reservation_only:
549 continue
550 reservation_only[packager_provided_file.definition.stem].append(
551 packager_provided_file
552 )
554 return result