Coverage for src/debputy/packager_provided_files.py: 88%
233 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
1import collections
2import dataclasses
3from typing import (
4 Mapping,
5 Iterable,
6 Dict,
7 List,
8 Optional,
9 Tuple,
10 Sequence,
11 Container,
12 TYPE_CHECKING,
13 FrozenSet,
14)
16from debputy.packages import BinaryPackage
17from debputy.plugin.api import VirtualPath
18from debputy.plugin.api.impl_types import (
19 PackagerProvidedFileClassSpec,
20 PluginProvidedKnownPackagingFile,
21)
22from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo
24if TYPE_CHECKING:
25 from debputy.plugin.api.feature_set import PluginProvidedFeatureSet
28_KNOWN_NON_TYPO_EXTENSIONS = frozenset(
29 {
30 "conf",
31 "config",
32 "sh",
33 "yml",
34 "yaml",
35 "json",
36 "bash",
37 "pl",
38 "py",
39 "md",
40 "rst",
41 # Fairly common image format in older packages
42 "xpm",
43 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives.
44 "bak",
45 "tmp",
46 "temp",
47 "orig",
48 "rej",
49 }
50)
53@dataclasses.dataclass(frozen=True, slots=True)
54class PackagerProvidedFile:
55 path: VirtualPath
56 package_name: str
57 installed_as_basename: str
58 provided_key: str
59 definition: PackagerProvidedFileClassSpec
60 match_priority: int = 0
61 fuzzy_match: bool = False
62 uses_explicit_package_name: bool = False
63 name_segment: Optional[str] = None
64 architecture_restriction: Optional[str] = None
65 expected_path: Optional[str] = None
67 def compute_dest(self) -> Tuple[str, str]:
68 return self.definition.compute_dest(
69 self.installed_as_basename,
70 owning_package=self.package_name,
71 path=self.path,
72 )
75@dataclasses.dataclass(slots=True)
76class PerPackagePackagerProvidedResult:
77 auto_installable: List[PackagerProvidedFile]
78 reserved_only: Dict[str, List[PackagerProvidedFile]]
81def _find_package_name_prefix(
82 binary_packages: Mapping[str, BinaryPackage],
83 main_binary_package: str,
84 max_periods_in_package_name: int,
85 path: VirtualPath,
86 *,
87 allow_fuzzy_matches: bool = False,
88) -> Iterable[Tuple[str, str, bool, bool]]:
89 if max_periods_in_package_name < 1:
90 prefix, remaining = path.name.split(".", 1)
91 package_name = prefix
92 bug_950723 = False
93 if allow_fuzzy_matches and package_name.endswith("@"):
94 package_name = package_name[:-1]
95 bug_950723 = True
96 if package_name in binary_packages:
97 yield package_name, remaining, True, bug_950723
98 else:
99 yield main_binary_package, path.name, False, False
100 return
102 parts = path.name.split(".", max_periods_in_package_name + 1)
103 for p in range(len(parts) - 1, 0, -1):
104 name = ".".join(parts[0:p])
105 bug_950723 = False
106 if allow_fuzzy_matches and name.endswith("@"): 106 ↛ 107line 106 didn't jump to line 107 because the condition on line 106 was never true
107 name = name[:-1]
108 bug_950723 = True
110 if name in binary_packages:
111 remaining = ".".join(parts[p:])
112 yield name, remaining, True, bug_950723
113 # main package case
114 yield main_binary_package, path.name, False, False
117def _iterate_stem_splits(basename: str) -> Tuple[str, str, int]:
118 stem = basename
119 period_count = stem.count(".")
120 yield stem, None, period_count
121 install_as_name = ""
122 while period_count > 0:
123 period_count -= 1
124 install_as_name_part, stem = stem.split(".", 1)
125 install_as_name = (
126 install_as_name + "." + install_as_name_part
127 if install_as_name != ""
128 else install_as_name_part
129 )
130 yield stem, install_as_name, period_count
133def _find_definition(
134 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
135 basename: str,
136 *,
137 period2stems: Optional[Mapping[int, Sequence[str]]] = None,
138 had_arch: bool = False,
139) -> Tuple[Optional[str], Optional[PackagerProvidedFileClassSpec], Optional[str]]:
140 for stem, install_as_name, period_count in _iterate_stem_splits(basename):
141 definition = packager_provided_files.get(stem)
142 if definition is not None:
143 return install_as_name, definition, None
144 if not period2stems:
145 continue
146 stems = period2stems.get(period_count)
148 if not stems:
149 continue
150 # If the stem is also the extension and a known one at that, then
151 # we do not consider it a typo match (to avoid false positives).
152 #
153 # We also ignore "foo.1" since manpages are kind of common.
154 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()):
155 continue
156 max_edit_distance = 2 if len(stem) > 3 else 1
157 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance)
158 if matches is not None and len(matches) == 1:
159 definition = packager_provided_files[matches[0]]
160 return install_as_name, definition, stem
161 return None, None, None
164def _check_mismatches(
165 path: VirtualPath,
166 definition: PackagerProvidedFileClassSpec,
167 owning_package: BinaryPackage,
168 install_as_name: Optional[str],
169 had_arch: bool,
170) -> None:
171 if install_as_name is not None and not definition.allow_name_segment: 171 ↛ 172line 171 didn't jump to line 172 because the condition on line 171 was never true
172 _error(
173 f'The file "{path.fs_path}" looks like a packager provided file for'
174 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".'
175 " However, this file type does not allow custom naming. The file type was registered"
176 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
177 " to file a bug/feature request."
178 )
179 if had_arch:
180 if owning_package.is_arch_all: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true
181 _error(
182 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
183 f" {owning_package.name} of type {definition.stem}."
184 " However, the package in question is arch:all. The use of architecture specific files"
185 " for arch:all packages does not make sense."
186 )
187 if not definition.allow_architecture_segment: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 _error(
189 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
190 f" {owning_package.name} of type {definition.stem}."
191 " However, this file type does not allow architecture specific variants. The file type was registered"
192 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
193 " to file a bug/feature request."
194 )
197def _split_basename(
198 basename: str,
199 owning_package: BinaryPackage,
200 *,
201 has_explicit_package: bool = False,
202 allow_fuzzy_matches: bool = False,
203) -> Tuple[str, int, Optional[str], bool]:
204 match_priority = 1 if has_explicit_package else 0
205 fuzzy_match = False
206 arch_restriction: Optional[str] = None
207 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true
208 basename = basename[:-3]
209 fuzzy_match = True
211 if "." in basename:
212 remaining, last_word = basename.rsplit(".", 1)
213 # We cannot use "resolved_architecture" as it would return "all".
214 if last_word == owning_package.package_deb_architecture_variable("ARCH"):
215 match_priority = 3
216 basename = remaining
217 arch_restriction = last_word
218 elif last_word == owning_package.package_deb_architecture_variable("ARCH_OS"): 218 ↛ 219line 218 didn't jump to line 219 because the condition on line 218 was never true
219 match_priority = 2
220 basename = remaining
221 arch_restriction = last_word
222 elif last_word == "all" and owning_package.is_arch_all: 222 ↛ 225line 222 didn't jump to line 225 because the condition on line 222 was never true
223 # This case does not make sense, but we detect it, so we can report an error
224 # via _check_mismatches.
225 match_priority = -1
226 basename = remaining
227 arch_restriction = last_word
229 return basename, match_priority, arch_restriction, fuzzy_match
232def _split_path(
233 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
234 binary_packages: Mapping[str, BinaryPackage],
235 main_binary_package: str,
236 max_periods_in_package_name: int,
237 path: VirtualPath,
238 *,
239 allow_fuzzy_matches: bool = False,
240 period2stems: Optional[Mapping[int, Sequence[str]]] = None,
241 known_static_non_ppf_names=frozenset(),
242) -> Iterable[PackagerProvidedFile]:
243 owning_package_name = main_binary_package
244 basename = path.name
245 match_priority = 0
246 had_arch = False
247 if "." not in basename:
248 definition = packager_provided_files.get(basename)
249 if definition is None:
250 return
251 if definition.packageless_is_fallback_for_all_packages:
252 yield from (
253 PackagerProvidedFile(
254 path=path,
255 package_name=n,
256 installed_as_basename=n,
257 provided_key=".UNNAMED.",
258 definition=definition,
259 match_priority=match_priority,
260 fuzzy_match=False,
261 uses_explicit_package_name=False,
262 name_segment=None,
263 architecture_restriction=None,
264 )
265 for n in binary_packages
266 )
267 else:
268 yield PackagerProvidedFile(
269 path=path,
270 package_name=owning_package_name,
271 installed_as_basename=owning_package_name,
272 provided_key=".UNNAMED.",
273 definition=definition,
274 match_priority=match_priority,
275 fuzzy_match=False,
276 uses_explicit_package_name=False,
277 name_segment=None,
278 architecture_restriction=None,
279 )
280 return
281 if f"debian/{path.name}" in known_static_non_ppf_names:
282 return
284 for (
285 owning_package_name,
286 basename,
287 explicit_package,
288 bug_950723,
289 ) in _find_package_name_prefix(
290 binary_packages,
291 main_binary_package,
292 max_periods_in_package_name,
293 path,
294 allow_fuzzy_matches=allow_fuzzy_matches,
295 ):
296 owning_package = binary_packages[owning_package_name]
298 basename, match_priority, arch_restriction, fuzzy_match = _split_basename(
299 basename,
300 owning_package,
301 has_explicit_package=explicit_package,
302 allow_fuzzy_matches=allow_fuzzy_matches,
303 )
305 install_as_name, definition, typoed_stem = _find_definition(
306 packager_provided_files,
307 basename,
308 period2stems=period2stems,
309 had_arch=bool(arch_restriction),
310 )
311 if definition is None:
312 continue
314 # Note: bug_950723 implies allow_fuzzy_matches
315 if bug_950723 and not definition.bug_950723: 315 ↛ 316line 315 didn't jump to line 316 because the condition on line 315 was never true
316 continue
318 if not allow_fuzzy_matches:
319 # LSP/Lint checks here but should not use `_check_mismatches` as
320 # the hard error disrupts them.
321 _check_mismatches(
322 path,
323 definition,
324 owning_package,
325 install_as_name,
326 arch_restriction is not None,
327 )
329 expected_path: Optional[str] = None
330 if (
331 definition.packageless_is_fallback_for_all_packages
332 and install_as_name is None
333 and not had_arch
334 and not explicit_package
335 and arch_restriction is None
336 ):
337 if typoed_stem is not None: 337 ↛ 338line 337 didn't jump to line 338 because the condition on line 337 was never true
338 parent_path = (
339 path.parent_dir.path + "/" if path.parent_dir is not None else ""
340 )
341 expected_path = f"{parent_path}{definition.stem}"
342 if fuzzy_match and path.name.endswith(".in"):
343 expected_path += ".in"
344 yield from (
345 PackagerProvidedFile(
346 path=path,
347 package_name=n,
348 installed_as_basename=f"{n}@" if bug_950723 else n,
349 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.",
350 definition=definition,
351 match_priority=match_priority,
352 fuzzy_match=fuzzy_match,
353 uses_explicit_package_name=False,
354 name_segment=None,
355 architecture_restriction=None,
356 expected_path=expected_path,
357 )
358 for n in binary_packages
359 )
360 else:
361 provided_key = (
362 install_as_name if install_as_name is not None else ".UNNAMED."
363 )
364 basename = (
365 install_as_name if install_as_name is not None else owning_package_name
366 )
367 if bug_950723:
368 provided_key = f"{provided_key}@"
369 basename = f"{basename}@"
370 package_prefix = f"{owning_package_name}@"
371 else:
372 package_prefix = owning_package_name
373 if typoed_stem:
374 parent_path = (
375 path.parent_dir.path + "/" if path.parent_dir is not None else ""
376 )
377 basename = definition.stem
378 if install_as_name is not None: 378 ↛ 379line 378 didn't jump to line 379 because the condition on line 378 was never true
379 basename = f"{install_as_name}.{basename}"
380 if explicit_package:
381 basename = f"{package_prefix}.{basename}"
382 if arch_restriction is not None and arch_restriction != "all": 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true
383 basename = f"{basename}.{arch_restriction}"
384 expected_path = f"{parent_path}{basename}"
385 if fuzzy_match and path.name.endswith(".in"): 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true
386 expected_path += ".in"
387 yield PackagerProvidedFile(
388 path=path,
389 package_name=owning_package_name,
390 installed_as_basename=basename,
391 provided_key=provided_key,
392 definition=definition,
393 match_priority=match_priority,
394 fuzzy_match=fuzzy_match,
395 uses_explicit_package_name=bool(explicit_package),
396 name_segment=install_as_name,
397 architecture_restriction=arch_restriction,
398 expected_path=expected_path,
399 )
400 return
403def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]:
404 result: Dict[int, List[str]] = {}
405 for stem in stems:
406 period_count = stem.count(".")
407 matched_stems = result.get(period_count)
408 if not matched_stems:
409 matched_stems = [stem]
410 result[period_count] = matched_stems
411 else:
412 matched_stems.append(stem)
413 return result
416def _find_main_package_name(
417 binary_packages: Mapping[str, BinaryPackage],
418 *,
419 allow_fuzzy_matches: bool = False,
420) -> Optional[str]:
421 main_packages = [p.name for p in binary_packages.values() if p.is_main_package]
422 if not main_packages: 422 ↛ 423line 422 didn't jump to line 423 because the condition on line 422 was never true
423 assert allow_fuzzy_matches
424 return next(
425 iter(p.name for p in binary_packages.values() if "Package" in p.fields),
426 None,
427 )
428 return main_packages[0]
431@dataclasses.dataclass(slots=True, frozen=True)
432class PackagingFileClassification:
433 path: VirtualPath
434 packager_provided_files_per_package: Optional[
435 Mapping[str, Sequence[PackagerProvidedFile]]
436 ]
439def classify_debian_packaging_files(
440 plugin_feature_set: "PluginProvidedFeatureSet",
441 debian_dir: VirtualPath,
442 binary_packages: Mapping[str, BinaryPackage],
443 *,
444 allow_fuzzy_matches: bool = False,
445 detect_typos: bool = False,
446 ignore_paths: Container[str] = frozenset(),
447) -> Iterable[PackagingFileClassification]:
448 packager_provided_files = plugin_feature_set.packager_provided_files
449 known_static_non_ppf_names: FrozenSet[str] = frozenset(
450 {
451 p.detection_value
452 for p in plugin_feature_set.known_packaging_files.values()
453 if p.detection_method == "path"
454 }
455 )
456 main_binary_package = _find_main_package_name(
457 binary_packages,
458 allow_fuzzy_matches=allow_fuzzy_matches,
459 )
460 if main_binary_package is None: 460 ↛ 461line 460 didn't jump to line 461 because the condition on line 460 was never true
461 return {}
462 provided_files_by_key: Dict[Tuple[str, str, str], PackagerProvidedFile] = {}
463 max_periods_in_package_name = max(name.count(".") for name in binary_packages)
464 if detect_typos and CAN_DETECT_TYPOS:
465 period2stems = _period_stem(packager_provided_files.keys())
466 else:
467 period2stems = {}
469 paths = []
471 for entry in debian_dir.iterdir:
472 if entry.is_dir or entry.name.startswith("."):
473 continue
474 if entry.path in ignore_paths:
475 continue
476 paths.append(entry)
477 matching_ppfs = _split_path(
478 packager_provided_files,
479 binary_packages,
480 main_binary_package,
481 max_periods_in_package_name,
482 entry,
483 allow_fuzzy_matches=allow_fuzzy_matches,
484 period2stems=period2stems,
485 known_static_non_ppf_names=known_static_non_ppf_names,
486 )
487 for packager_provided_file in matching_ppfs:
488 match_key = (
489 packager_provided_file.package_name,
490 packager_provided_file.definition.stem,
491 packager_provided_file.provided_key,
492 )
493 existing = provided_files_by_key.get(match_key)
494 if (
495 existing is not None
496 and existing.match_priority > packager_provided_file.match_priority
497 ):
498 continue
499 provided_files_by_key[match_key] = packager_provided_file
501 paths2ppfs_per_package = {}
502 for packager_provided_file in provided_files_by_key.values():
503 package_name = packager_provided_file.package_name
504 path_name = packager_provided_file.path.path
505 ppfs_per_package = paths2ppfs_per_package.get(path_name)
506 if ppfs_per_package is None:
507 ppfs_per_package = collections.defaultdict(list)
508 paths2ppfs_per_package[path_name] = ppfs_per_package
509 ppfs_per_package[package_name].append(packager_provided_file)
511 for entry in paths:
512 yield PackagingFileClassification(
513 entry,
514 paths2ppfs_per_package.get(entry.path),
515 )
518def detect_all_packager_provided_files(
519 plugin_feature_set: "PluginProvidedFeatureSet",
520 debian_dir: VirtualPath,
521 binary_packages: Mapping[str, BinaryPackage],
522 *,
523 allow_fuzzy_matches: bool = False,
524 detect_typos: bool = False,
525 ignore_paths: Container[str] = frozenset(),
526) -> Dict[str, PerPackagePackagerProvidedResult]:
527 result = {
528 n: PerPackagePackagerProvidedResult([], collections.defaultdict(list))
529 for n in binary_packages
530 }
531 for classified_path in classify_debian_packaging_files(
532 plugin_feature_set,
533 debian_dir,
534 binary_packages,
535 allow_fuzzy_matches=allow_fuzzy_matches,
536 detect_typos=detect_typos,
537 ignore_paths=ignore_paths,
538 ):
539 provided_files = classified_path.packager_provided_files_per_package
540 if not provided_files:
541 continue
542 for package_name, provided_file_data in provided_files.items():
543 per_package_result = result[package_name]
544 per_package_result.auto_installable.extend(
545 x for x in provided_file_data if not x.definition.reservation_only
546 )
547 reservation_only = per_package_result.reserved_only
548 for packager_provided_file in provided_file_data:
549 if not packager_provided_file.definition.reservation_only:
550 continue
551 reservation_only[packager_provided_file.definition.stem].append(
552 packager_provided_file
553 )
555 return result