Coverage for src/debputy/packager_provided_files.py: 88%
237 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-04-19 20:37 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2026-04-19 20:37 +0000
1import collections.abc
2import dataclasses
3from typing import (
4 TYPE_CHECKING,
5)
6from collections.abc import Mapping, Iterable, Sequence, Container
8from debputy.packages import BinaryPackage
9from debputy.plugin.api import VirtualPath
10from debputy.plugin.api.impl_types import (
11 PackagerProvidedFileClassSpec,
12)
13from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo
15if TYPE_CHECKING:
16 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet
19_KNOWN_NON_TYPO_EXTENSIONS = frozenset(
20 {
21 "conf",
22 "config",
23 "sh",
24 "yml",
25 "yaml",
26 "json",
27 "bash",
28 "pl",
29 "py",
30 "md",
31 "rst",
32 # Fairly common image format in older packages
33 "xpm",
34 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives.
35 "bak",
36 "tmp",
37 "temp",
38 "orig",
39 "rej",
40 }
41)
44@dataclasses.dataclass(frozen=True, slots=True)
45class PackagerProvidedFile:
46 path: VirtualPath
47 package_name: str
48 installed_as_basename: str
49 provided_key: str
50 definition: PackagerProvidedFileClassSpec
51 match_priority: int = 0
52 fuzzy_match: bool = False
53 uses_explicit_package_name: bool = False
54 name_segment: str | None = None
55 architecture_restriction: str | None = None
56 expected_path: str | None = None
58 def compute_dest(self) -> tuple[str, str]:
59 return self.definition.compute_dest(
60 self.installed_as_basename,
61 owning_package=self.package_name,
62 path=self.path,
63 )
66@dataclasses.dataclass(slots=True)
67class PerPackagePackagerProvidedResult:
68 auto_installable: list[PackagerProvidedFile]
69 reserved_only: dict[str, list[PackagerProvidedFile]]
72def _find_package_name_prefix(
73 binary_packages: Mapping[str, BinaryPackage],
74 main_binary_package: str,
75 max_periods_in_package_name: int,
76 path: VirtualPath,
77 *,
78 allow_fuzzy_matches: bool = False,
79) -> Iterable[tuple[str, str, bool, bool]]:
80 if max_periods_in_package_name < 1:
81 prefix, remaining = path.name.split(".", 1)
82 package_name = prefix
83 bug_950723 = False
84 if allow_fuzzy_matches and package_name.endswith("@"):
85 package_name = package_name[:-1]
86 bug_950723 = True
87 if package_name in binary_packages:
88 yield package_name, remaining, True, bug_950723
89 else:
90 yield main_binary_package, path.name, False, False
91 return
93 parts = path.name.split(".", max_periods_in_package_name + 1)
94 for p in range(len(parts) - 1, 0, -1):
95 name = ".".join(parts[0:p])
96 bug_950723 = False
97 if allow_fuzzy_matches and name.endswith("@"): 97 ↛ 98line 97 didn't jump to line 98 because the condition on line 97 was never true
98 name = name[:-1]
99 bug_950723 = True
101 if name in binary_packages:
102 remaining = ".".join(parts[p:])
103 yield name, remaining, True, bug_950723
104 # main package case
105 yield main_binary_package, path.name, False, False
108def _iterate_stem_splits(
109 basename: str,
110) -> collections.abc.Iterator[tuple[str, str | None, int]]:
111 stem = basename
112 period_count = stem.count(".")
113 yield stem, None, period_count
114 install_as_name = ""
115 while period_count > 0:
116 period_count -= 1
117 install_as_name_part, stem = stem.split(".", 1)
118 install_as_name = (
119 install_as_name + "." + install_as_name_part
120 if install_as_name != ""
121 else install_as_name_part
122 )
123 yield stem, install_as_name, period_count
126def _find_definition(
127 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
128 basename: str,
129 *,
130 period2stems: Mapping[int, Sequence[str]] | None = None,
131 had_arch: bool = False,
132) -> tuple[str | None, PackagerProvidedFileClassSpec | None, str | None]:
133 for stem, install_as_name, period_count in _iterate_stem_splits(basename):
134 definition = packager_provided_files.get(stem)
135 if definition is not None:
136 return install_as_name, definition, None
137 if not period2stems:
138 continue
139 stems = period2stems.get(period_count)
141 if not stems:
142 continue
143 # If the stem is also the extension and a known one at that, then
144 # we do not consider it a typo match (to avoid false positives).
145 #
146 # We also ignore "foo.1" since manpages are kind of common.
147 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()):
148 continue
149 max_edit_distance = 2 if len(stem) > 3 else 1
150 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance)
151 if matches is not None and len(matches) == 1:
152 definition = packager_provided_files[matches[0]]
153 return install_as_name, definition, stem
154 return None, None, None
157def _check_mismatches(
158 path: VirtualPath,
159 definition: PackagerProvidedFileClassSpec,
160 owning_package: BinaryPackage,
161 install_as_name: str | None,
162 had_arch: bool,
163) -> None:
164 if install_as_name is not None and not definition.allow_name_segment: 164 ↛ 165line 164 didn't jump to line 165 because the condition on line 164 was never true
165 _error(
166 f'The file "{path.fs_path}" looks like a packager provided file for'
167 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".'
168 " However, this file type does not allow custom naming. The file type was registered"
169 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
170 " to file a bug/feature request."
171 )
172 if had_arch:
173 if owning_package.is_arch_all: 173 ↛ 174line 173 didn't jump to line 174 because the condition on line 173 was never true
174 _error(
175 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
176 f" {owning_package.name} of type {definition.stem}."
177 " However, the package in question is arch:all. The use of architecture specific files"
178 " for arch:all packages does not make sense."
179 )
180 if not definition.allow_architecture_segment: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true
181 _error(
182 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
183 f" {owning_package.name} of type {definition.stem}."
184 " However, this file type does not allow architecture specific variants. The file type was registered"
185 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
186 " to file a bug/feature request."
187 )
190def _split_basename(
191 basename: str,
192 owning_package: BinaryPackage,
193 *,
194 has_explicit_package: bool = False,
195 allow_fuzzy_matches: bool = False,
196) -> tuple[str, int, str | None, bool]:
197 match_priority = 1 if has_explicit_package else 0
198 fuzzy_match = False
199 arch_restriction: str | None = None
200 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true
201 basename = basename[:-3]
202 fuzzy_match = True
204 if "." in basename:
205 remaining, last_word = basename.rsplit(".", 1)
206 # We cannot use "resolved_architecture" as it would return "all".
207 if last_word == owning_package.package_deb_architecture_variable("ARCH"):
208 match_priority = 3
209 basename = remaining
210 arch_restriction = last_word
211 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true
212 match_priority = 2
213 basename = remaining
214 arch_restriction = last_word
215 elif last_word == "all" and owning_package.is_arch_all: 215 ↛ 218line 215 didn't jump to line 218 because the condition on line 215 was never true
216 # This case does not make sense, but we detect it, so we can report an error
217 # via _check_mismatches.
218 match_priority = -1
219 basename = remaining
220 arch_restriction = last_word
222 return basename, match_priority, arch_restriction, fuzzy_match
225def _split_path(
226 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
227 binary_packages: Mapping[str, BinaryPackage],
228 main_binary_package: str,
229 max_periods_in_package_name: int,
230 path: VirtualPath,
231 *,
232 allow_fuzzy_matches: bool = False,
233 period2stems: Mapping[int, Sequence[str]] | None = None,
234 known_static_non_ppf_names=frozenset(),
235) -> Iterable[PackagerProvidedFile]:
236 owning_package_name = main_binary_package
237 basename = path.name
238 match_priority = 0
239 had_arch = False
240 if "." not in basename:
241 definition = packager_provided_files.get(basename)
242 if definition is None:
243 return
244 if definition.packageless_is_fallback_for_all_packages:
245 yield from (
246 PackagerProvidedFile(
247 path=path,
248 package_name=n,
249 installed_as_basename=n,
250 provided_key=".UNNAMED.",
251 definition=definition,
252 match_priority=match_priority,
253 fuzzy_match=False,
254 uses_explicit_package_name=False,
255 name_segment=None,
256 architecture_restriction=None,
257 )
258 for n, p in binary_packages.items()
259 if p.package_type in definition.package_types
260 )
261 elif ( 261 ↛ 278line 261 didn't jump to line 278 because the condition on line 261 was always true
262 binary_packages[owning_package_name].package_type
263 in definition.package_types
264 ):
265 # TODO: raise some noise (a la `not allow fuzzy_matches`) if not matching the package type.
266 yield PackagerProvidedFile(
267 path=path,
268 package_name=owning_package_name,
269 installed_as_basename=owning_package_name,
270 provided_key=".UNNAMED.",
271 definition=definition,
272 match_priority=match_priority,
273 fuzzy_match=False,
274 uses_explicit_package_name=False,
275 name_segment=None,
276 architecture_restriction=None,
277 )
278 return
279 if f"debian/{path.name}" in known_static_non_ppf_names:
280 return
282 for (
283 owning_package_name,
284 basename,
285 explicit_package,
286 bug_950723,
287 ) in _find_package_name_prefix(
288 binary_packages,
289 main_binary_package,
290 max_periods_in_package_name,
291 path,
292 allow_fuzzy_matches=allow_fuzzy_matches,
293 ):
294 owning_package = binary_packages[owning_package_name]
296 basename, match_priority, arch_restriction, fuzzy_match = _split_basename(
297 basename,
298 owning_package,
299 has_explicit_package=explicit_package,
300 allow_fuzzy_matches=allow_fuzzy_matches,
301 )
303 install_as_name, definition, typoed_stem = _find_definition(
304 packager_provided_files,
305 basename,
306 period2stems=period2stems,
307 had_arch=bool(arch_restriction),
308 )
309 if definition is None:
310 continue
312 # Note: bug_950723 implies allow_fuzzy_matches
313 if bug_950723 and not definition.bug_950723: 313 ↛ 314line 313 didn't jump to line 314 because the condition on line 313 was never true
314 continue
316 if owning_package.package_type not in definition.package_types: 316 ↛ 318line 316 didn't jump to line 318 because the condition on line 316 was never true
317 # TODO: raise some noise (a la `not allow fuzzy_matches`)
318 continue
320 if not allow_fuzzy_matches:
321 # LSP/Lint checks here but should not use `_check_mismatches` as
322 # the hard error disrupts them.
323 _check_mismatches(
324 path,
325 definition,
326 owning_package,
327 install_as_name,
328 arch_restriction is not None,
329 )
331 expected_path: str | None = None
332 if (
333 definition.packageless_is_fallback_for_all_packages
334 and install_as_name is None
335 and not had_arch
336 and not explicit_package
337 and arch_restriction is None
338 ):
339 if typoed_stem is not None: 339 ↛ 340line 339 didn't jump to line 340 because the condition on line 339 was never true
340 parent_path = (
341 path.parent_dir.path + "/" if path.parent_dir is not None else ""
342 )
343 expected_path = f"{parent_path}{definition.stem}"
344 if fuzzy_match and path.name.endswith(".in"):
345 expected_path += ".in"
346 yield from (
347 PackagerProvidedFile(
348 path=path,
349 package_name=n,
350 installed_as_basename=f"{n}@" if bug_950723 else n,
351 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.",
352 definition=definition,
353 match_priority=match_priority,
354 fuzzy_match=fuzzy_match,
355 uses_explicit_package_name=False,
356 name_segment=None,
357 architecture_restriction=None,
358 expected_path=expected_path,
359 )
360 for n in binary_packages
361 )
362 else:
363 provided_key = (
364 install_as_name if install_as_name is not None else ".UNNAMED."
365 )
366 basename = (
367 install_as_name if install_as_name is not None else owning_package_name
368 )
369 if bug_950723:
370 provided_key = f"{provided_key}@"
371 basename = f"{basename}@"
372 package_prefix = f"{owning_package_name}@"
373 else:
374 package_prefix = owning_package_name
375 if typoed_stem:
376 parent_path = (
377 path.parent_dir.path + "/" if path.parent_dir is not None else ""
378 )
379 basename = definition.stem
380 if install_as_name is not None: 380 ↛ 381line 380 didn't jump to line 381 because the condition on line 380 was never true
381 basename = f"{install_as_name}.{basename}"
382 if explicit_package:
383 basename = f"{package_prefix}.{basename}"
384 if arch_restriction is not None and arch_restriction != "all": 384 ↛ 385line 384 didn't jump to line 385 because the condition on line 384 was never true
385 basename = f"{basename}.{arch_restriction}"
386 expected_path = f"{parent_path}{basename}"
387 if fuzzy_match and path.name.endswith(".in"): 387 ↛ 388line 387 didn't jump to line 388 because the condition on line 387 was never true
388 expected_path += ".in"
389 yield PackagerProvidedFile(
390 path=path,
391 package_name=owning_package_name,
392 installed_as_basename=basename,
393 provided_key=provided_key,
394 definition=definition,
395 match_priority=match_priority,
396 fuzzy_match=fuzzy_match,
397 uses_explicit_package_name=bool(explicit_package),
398 name_segment=install_as_name,
399 architecture_restriction=arch_restriction,
400 expected_path=expected_path,
401 )
402 return
405def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]:
406 result: dict[int, list[str]] = {}
407 for stem in stems:
408 period_count = stem.count(".")
409 matched_stems = result.get(period_count)
410 if not matched_stems:
411 matched_stems = [stem]
412 result[period_count] = matched_stems
413 else:
414 matched_stems.append(stem)
415 return result
418def _find_main_package_name(
419 binary_packages: Mapping[str, BinaryPackage],
420 *,
421 allow_fuzzy_matches: bool = False,
422) -> str | None:
423 main_packages = [p.name for p in binary_packages.values() if p.is_main_package]
424 if not main_packages: 424 ↛ 425line 424 didn't jump to line 425 because the condition on line 424 was never true
425 assert allow_fuzzy_matches
426 return next(
427 iter(p.name for p in binary_packages.values() if "Package" in p.fields),
428 None,
429 )
430 return main_packages[0]
433@dataclasses.dataclass(slots=True, frozen=True)
434class PackagingFileClassification:
435 path: VirtualPath
436 packager_provided_files_per_package: None | (
437 Mapping[str, Sequence[PackagerProvidedFile]]
438 )
441def classify_debian_packaging_files(
442 plugin_feature_set: "PluginProvidedFeatureSet",
443 debian_dir: VirtualPath,
444 binary_packages: Mapping[str, BinaryPackage],
445 *,
446 allow_fuzzy_matches: bool = False,
447 detect_typos: bool = False,
448 ignore_paths: Container[str] = frozenset(),
449) -> Iterable[PackagingFileClassification]:
450 packager_provided_files = plugin_feature_set.packager_provided_files
451 known_static_non_ppf_names: frozenset[str] = frozenset(
452 {
453 p.detection_value
454 for p in plugin_feature_set.known_packaging_files.values()
455 if p.detection_method == "path"
456 }
457 )
458 main_binary_package = _find_main_package_name(
459 binary_packages,
460 allow_fuzzy_matches=allow_fuzzy_matches,
461 )
462 if main_binary_package is None: 462 ↛ 463line 462 didn't jump to line 463 because the condition on line 462 was never true
463 return
464 provided_files_by_key: dict[tuple[str, str, str], PackagerProvidedFile] = {}
465 max_periods_in_package_name = max(name.count(".") for name in binary_packages)
466 if detect_typos and CAN_DETECT_TYPOS:
467 period2stems = _period_stem(packager_provided_files.keys())
468 else:
469 period2stems = {}
471 paths = []
473 for entry in debian_dir.iterdir():
474 if entry.is_dir or entry.name.startswith("."):
475 continue
476 if entry.path in ignore_paths:
477 continue
478 paths.append(entry)
479 matching_ppfs = _split_path(
480 packager_provided_files,
481 binary_packages,
482 main_binary_package,
483 max_periods_in_package_name,
484 entry,
485 allow_fuzzy_matches=allow_fuzzy_matches,
486 period2stems=period2stems,
487 known_static_non_ppf_names=known_static_non_ppf_names,
488 )
489 for packager_provided_file in matching_ppfs:
490 match_key = (
491 packager_provided_file.package_name,
492 packager_provided_file.definition.stem,
493 packager_provided_file.provided_key,
494 )
495 existing = provided_files_by_key.get(match_key)
496 if (
497 existing is not None
498 and existing.match_priority > packager_provided_file.match_priority
499 ):
500 continue
501 provided_files_by_key[match_key] = packager_provided_file
503 paths2ppfs_per_package = dict[str, dict[str, list[PackagerProvidedFile]]]()
504 for packager_provided_file in provided_files_by_key.values():
505 package_name = packager_provided_file.package_name
506 path_name = packager_provided_file.path.path
507 ppfs_per_package = paths2ppfs_per_package.get(path_name)
508 if ppfs_per_package is None:
509 ppfs_per_package = collections.defaultdict(list)
510 paths2ppfs_per_package[path_name] = ppfs_per_package
511 ppfs_per_package[package_name].append(packager_provided_file)
513 for entry in paths:
514 yield PackagingFileClassification(
515 entry,
516 paths2ppfs_per_package.get(entry.path),
517 )
520def detect_all_packager_provided_files(
521 plugin_feature_set: "PluginProvidedFeatureSet",
522 debian_dir: VirtualPath,
523 binary_packages: Mapping[str, BinaryPackage],
524 *,
525 allow_fuzzy_matches: bool = False,
526 detect_typos: bool = False,
527 ignore_paths: Container[str] = frozenset(),
528) -> dict[str, PerPackagePackagerProvidedResult]:
529 result = {
530 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list))
531 for n in binary_packages
532 }
533 for classified_path in classify_debian_packaging_files(
534 plugin_feature_set,
535 debian_dir,
536 binary_packages,
537 allow_fuzzy_matches=allow_fuzzy_matches,
538 detect_typos=detect_typos,
539 ignore_paths=ignore_paths,
540 ):
541 provided_files = classified_path.packager_provided_files_per_package
542 if not provided_files:
543 continue
544 for package_name, provided_file_data in provided_files.items():
545 per_package_result = result[package_name]
546 per_package_result.auto_installable.extend(
547 x for x in provided_file_data if not x.definition.reservation_only
548 )
549 reservation_only = per_package_result.reserved_only
550 for packager_provided_file in provided_file_data:
551 if not packager_provided_file.definition.reservation_only:
552 continue
553 reservation_only[packager_provided_file.definition.stem].append(
554 packager_provided_file
555 )
557 return result