Coverage for src/debputy/packager_provided_files.py: 86%
204 statements
« prev ^ index » next coverage.py v7.6.0, created at 2025-01-27 13:59 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2025-01-27 13:59 +0000
1import collections
2import dataclasses
3from typing import Mapping, Iterable, Dict, List, Optional, Tuple, Sequence, Container
5from debputy.packages import BinaryPackage
6from debputy.plugin.api import VirtualPath
7from debputy.plugin.api.impl_types import PackagerProvidedFileClassSpec
8from debputy.util import _error, CAN_DETECT_TYPOS, detect_possible_typo
11_KNOWN_NON_PPFS = frozenset(
12 {
13 # Some of these overlap with the _KNOWN_NON_TYPO_EXTENSIONS below
14 # This one is a quicker check. The _KNOWN_NON_TYPO_EXTENSIONS is a general (but more
15 # expensive check).
16 "gbp.conf", # Typo matches with `gbp.config` (dh_installdebconf) in two edits steps
17 "salsa-ci.yml", # Typo matches with `salsa-ci.wm` (dh_installwm) in two edits steps
18 # No reason to check any of these as they are never PPFs
19 "clean",
20 "control",
21 "compat",
22 "debputy.manifest",
23 "rules",
24 # NB: changelog and copyright are (de facto) ppfs, so they are deliberately omitted
25 }
26)
28_KNOWN_NON_TYPO_EXTENSIONS = frozenset(
29 {
30 "conf",
31 "sh",
32 "yml",
33 "yaml",
34 "json",
35 "bash",
36 "pl",
37 "py",
38 "md",
39 # Fairly common image format in older packages
40 "xpm",
41 # Cruft of various kind (for unclean work directories). Just to avoid stupid false-positives.
42 "bak",
43 "tmp",
44 "temp",
45 "orig",
46 "rej",
47 }
48)
51@dataclasses.dataclass(frozen=True, slots=True)
52class PackagerProvidedFile:
53 path: VirtualPath
54 package_name: str
55 installed_as_basename: str
56 provided_key: str
57 definition: PackagerProvidedFileClassSpec
58 match_priority: int = 0
59 fuzzy_match: bool = False
60 uses_explicit_package_name: bool = False
61 name_segment: Optional[str] = None
62 architecture_restriction: Optional[str] = None
63 expected_path: Optional[str] = None
65 def compute_dest(self) -> Tuple[str, str]:
66 return self.definition.compute_dest(
67 self.installed_as_basename,
68 owning_package=self.package_name,
69 path=self.path,
70 )
73@dataclasses.dataclass(frozen=True, slots=True)
74class PerPackagePackagerProvidedResult:
75 auto_installable: List[PackagerProvidedFile]
76 reserved_only: Dict[str, List[PackagerProvidedFile]]
79def _find_package_name_prefix(
80 binary_packages: Mapping[str, BinaryPackage],
81 main_binary_package: str,
82 max_periods_in_package_name: int,
83 path: VirtualPath,
84 *,
85 allow_fuzzy_matches: bool = False,
86) -> Iterable[Tuple[str, str, bool, bool]]:
87 if max_periods_in_package_name < 1:
88 prefix, remaining = path.name.split(".", 1)
89 package_name = prefix
90 bug_950723 = False
91 if allow_fuzzy_matches and package_name.endswith("@"):
92 package_name = package_name[:-1]
93 bug_950723 = True
94 if package_name in binary_packages:
95 yield package_name, remaining, True, bug_950723
96 else:
97 yield main_binary_package, path.name, False, False
98 return
100 parts = path.name.split(".", max_periods_in_package_name + 1)
101 for p in range(len(parts) - 1, 0, -1):
102 name = ".".join(parts[0:p])
103 bug_950723 = False
104 if allow_fuzzy_matches and name.endswith("@"): 104 ↛ 105line 104 didn't jump to line 105 because the condition on line 104 was never true
105 name = name[:-1]
106 bug_950723 = True
108 if name in binary_packages:
109 remaining = ".".join(parts[p:])
110 yield name, remaining, True, bug_950723
111 # main package case
112 yield main_binary_package, path.name, False, False
115def _iterate_stem_splits(basename: str) -> Tuple[str, str, int]:
116 stem = basename
117 period_count = stem.count(".")
118 yield stem, None, period_count
119 install_as_name = ""
120 while period_count > 0:
121 period_count -= 1
122 install_as_name_part, stem = stem.split(".", 1)
123 install_as_name = (
124 install_as_name + "." + install_as_name_part
125 if install_as_name != ""
126 else install_as_name_part
127 )
128 yield stem, install_as_name, period_count
131def _find_definition(
132 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
133 basename: str,
134 *,
135 period2stems: Optional[Mapping[int, Sequence[str]]] = None,
136 had_arch: bool = False,
137) -> Tuple[Optional[str], Optional[PackagerProvidedFileClassSpec], Optional[str]]:
138 for stem, install_as_name, period_count in _iterate_stem_splits(basename):
139 definition = packager_provided_files.get(stem)
140 if definition is not None:
141 return install_as_name, definition, None
142 if not period2stems:
143 continue
144 stems = period2stems.get(period_count)
146 if not stems:
147 continue
148 # If the stem is also the extension and a known one at that, then
149 # we do not consider it a typo match (to avoid false positives).
150 #
151 # We also ignore "foo.1" since manpages are kind of common.
152 if not had_arch and (stem in _KNOWN_NON_TYPO_EXTENSIONS or stem.isdigit()):
153 continue
154 max_edit_distance = 2 if len(stem) > 3 else 1
155 matches = detect_possible_typo(stem, stems, max_edit_distance=max_edit_distance)
156 if matches is not None and len(matches) == 1:
157 definition = packager_provided_files[matches[0]]
158 return install_as_name, definition, stem
159 return None, None, None
162def _check_mismatches(
163 path: VirtualPath,
164 definition: PackagerProvidedFileClassSpec,
165 owning_package: BinaryPackage,
166 install_as_name: Optional[str],
167 had_arch: bool,
168) -> None:
169 if install_as_name is not None and not definition.allow_name_segment: 169 ↛ 170line 169 didn't jump to line 170 because the condition on line 169 was never true
170 _error(
171 f'The file "{path.fs_path}" looks like a packager provided file for'
172 f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".'
173 " However, this file type does not allow custom naming. The file type was registered"
174 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
175 " to file a bug/feature request."
176 )
177 if had_arch:
178 if owning_package.is_arch_all: 178 ↛ 179line 178 didn't jump to line 179 because the condition on line 178 was never true
179 _error(
180 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
181 f" {owning_package.name} of type {definition.stem}."
182 " However, the package in question is arch:all. The use of architecture specific files"
183 " for arch:all packages does not make sense."
184 )
185 if not definition.allow_architecture_segment: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true
186 _error(
187 f'The file "{path.fs_path}" looks like an architecture specific packager provided file for'
188 f" {owning_package.name} of type {definition.stem}."
189 " However, this file type does not allow architecture specific variants. The file type was registered"
190 f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want"
191 " to file a bug/feature request."
192 )
195def _split_path(
196 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
197 binary_packages: Mapping[str, BinaryPackage],
198 main_binary_package: str,
199 max_periods_in_package_name: int,
200 path: VirtualPath,
201 *,
202 allow_fuzzy_matches: bool = False,
203 period2stems: Optional[Mapping[int, Sequence[str]]] = None,
204) -> Iterable[PackagerProvidedFile]:
205 owning_package_name = main_binary_package
206 basename = path.name
207 match_priority = 0
208 had_arch = False
209 if "." not in basename:
210 definition = packager_provided_files.get(basename)
211 if definition is None: 211 ↛ 212line 211 didn't jump to line 212 because the condition on line 211 was never true
212 return
213 if definition.packageless_is_fallback_for_all_packages:
214 yield from (
215 PackagerProvidedFile(
216 path=path,
217 package_name=n,
218 installed_as_basename=n,
219 provided_key=".UNNAMED.",
220 definition=definition,
221 match_priority=match_priority,
222 fuzzy_match=False,
223 uses_explicit_package_name=False,
224 name_segment=None,
225 architecture_restriction=None,
226 )
227 for n in binary_packages
228 )
229 else:
230 yield PackagerProvidedFile(
231 path=path,
232 package_name=owning_package_name,
233 installed_as_basename=owning_package_name,
234 provided_key=".UNNAMED.",
235 definition=definition,
236 match_priority=match_priority,
237 fuzzy_match=False,
238 uses_explicit_package_name=False,
239 name_segment=None,
240 architecture_restriction=None,
241 )
242 return
244 for (
245 owning_package_name,
246 basename,
247 explicit_package,
248 bug_950723,
249 ) in _find_package_name_prefix(
250 binary_packages,
251 main_binary_package,
252 max_periods_in_package_name,
253 path,
254 allow_fuzzy_matches=allow_fuzzy_matches,
255 ):
256 owning_package = binary_packages[owning_package_name]
257 match_priority = 1 if explicit_package else 0
258 fuzzy_match = False
259 arch_restriction: Optional[str] = None
261 if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: 261 ↛ 262line 261 didn't jump to line 262 because the condition on line 261 was never true
262 basename = basename[:-3]
263 fuzzy_match = True
265 if "." in basename:
266 remaining, last_word = basename.rsplit(".", 1)
267 # We cannot use "resolved_architecture" as it would return "all".
268 if last_word == owning_package.package_deb_architecture_variable("ARCH"):
269 match_priority = 3
270 basename = remaining
271 arch_restriction = last_word
272 elif last_word == owning_package.package_deb_architecture_variable( 272 ↛ 275line 272 didn't jump to line 275 because the condition on line 272 was never true
273 "ARCH_OS"
274 ):
275 match_priority = 2
276 basename = remaining
277 arch_restriction = last_word
278 elif last_word == "all" and owning_package.is_arch_all: 278 ↛ 281line 278 didn't jump to line 281 because the condition on line 278 was never true
279 # This case does not make sense, but we detect it, so we can report an error
280 # via _check_mismatches.
281 match_priority = -1
282 basename = remaining
283 arch_restriction = last_word
285 install_as_name, definition, typoed_stem = _find_definition(
286 packager_provided_files,
287 basename,
288 period2stems=period2stems,
289 had_arch=bool(arch_restriction),
290 )
291 if definition is None:
292 continue
294 # Note: bug_950723 implies allow_fuzzy_matches
295 if bug_950723 and not definition.bug_950723: 295 ↛ 296line 295 didn't jump to line 296 because the condition on line 295 was never true
296 continue
298 if not allow_fuzzy_matches:
299 # LSP/Lint checks here but should not use `_check_mismatches` as
300 # the hard error disrupts them.
301 _check_mismatches(
302 path,
303 definition,
304 owning_package,
305 install_as_name,
306 arch_restriction is not None,
307 )
309 expected_path: Optional[str] = None
310 if (
311 definition.packageless_is_fallback_for_all_packages
312 and install_as_name is None
313 and not had_arch
314 and not explicit_package
315 and arch_restriction is None
316 ):
317 if typoed_stem is not None: 317 ↛ 318line 317 didn't jump to line 318
318 parent_path = (
319 path.parent_dir.path + "/" if path.parent_dir is not None else ""
320 )
321 expected_path = f"{parent_path}{definition.stem}"
322 if fuzzy_match and path.name.endswith(".in"):
323 expected_path += ".in"
324 yield from (
325 PackagerProvidedFile(
326 path=path,
327 package_name=n,
328 installed_as_basename=f"{n}@" if bug_950723 else n,
329 provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.",
330 definition=definition,
331 match_priority=match_priority,
332 fuzzy_match=fuzzy_match,
333 uses_explicit_package_name=False,
334 name_segment=None,
335 architecture_restriction=None,
336 expected_path=expected_path,
337 )
338 for n in binary_packages
339 )
340 else:
341 provided_key = (
342 install_as_name if install_as_name is not None else ".UNNAMED."
343 )
344 basename = (
345 install_as_name if install_as_name is not None else owning_package_name
346 )
347 if bug_950723:
348 provided_key = f"{provided_key}@"
349 basename = f"{basename}@"
350 package_prefix = f"{owning_package_name}@"
351 else:
352 package_prefix = owning_package_name
353 if typoed_stem:
354 parent_path = (
355 path.parent_dir.path + "/" if path.parent_dir is not None else ""
356 )
357 basename = definition.stem
358 if install_as_name is not None: 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true
359 basename = f"{install_as_name}.{basename}"
360 if explicit_package:
361 basename = f"{package_prefix}.{basename}"
362 if arch_restriction is not None and arch_restriction != "all": 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true
363 basename = f"{basename}.{arch_restriction}"
364 expected_path = f"{parent_path}{basename}"
365 if fuzzy_match and path.name.endswith(".in"): 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true
366 expected_path += ".in"
367 yield PackagerProvidedFile(
368 path=path,
369 package_name=owning_package_name,
370 installed_as_basename=basename,
371 provided_key=provided_key,
372 definition=definition,
373 match_priority=match_priority,
374 fuzzy_match=fuzzy_match,
375 uses_explicit_package_name=bool(explicit_package),
376 name_segment=install_as_name,
377 architecture_restriction=arch_restriction,
378 expected_path=expected_path,
379 )
380 return
383def _period_stem(stems: Iterable[str]) -> Mapping[int, Sequence[str]]:
384 result: Dict[int, List[str]] = {}
385 for stem in stems:
386 period_count = stem.count(".")
387 matched_stems = result.get(period_count)
388 if not matched_stems:
389 matched_stems = [stem]
390 result[period_count] = matched_stems
391 else:
392 matched_stems.append(stem)
393 return result
396def detect_all_packager_provided_files(
397 packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec],
398 debian_dir: VirtualPath,
399 binary_packages: Mapping[str, BinaryPackage],
400 *,
401 allow_fuzzy_matches: bool = False,
402 detect_typos: bool = False,
403 ignore_paths: Container[str] = frozenset(),
404) -> Dict[str, PerPackagePackagerProvidedResult]:
405 main_packages = [p.name for p in binary_packages.values() if p.is_main_package]
406 if not main_packages: 406 ↛ 407line 406 didn't jump to line 407 because the condition on line 406 was never true
407 assert allow_fuzzy_matches
408 main_binary_package = next(
409 iter(p.name for p in binary_packages.values() if "Package" in p.fields),
410 None,
411 )
412 if main_binary_package is None:
413 return {}
414 else:
415 main_binary_package = main_packages[0]
416 provided_files: Dict[str, Dict[Tuple[str, str], PackagerProvidedFile]] = {
417 n: {} for n in binary_packages
418 }
419 max_periods_in_package_name = max(name.count(".") for name in binary_packages)
420 if detect_typos and CAN_DETECT_TYPOS:
421 period2stems = _period_stem(packager_provided_files.keys())
422 else:
423 period2stems = {}
425 for entry in debian_dir.iterdir:
426 if entry.is_dir or entry.name.startswith("."):
427 continue
428 if entry.path in ignore_paths or entry.name in _KNOWN_NON_PPFS:
429 continue
430 matching_ppfs = _split_path(
431 packager_provided_files,
432 binary_packages,
433 main_binary_package,
434 max_periods_in_package_name,
435 entry,
436 allow_fuzzy_matches=allow_fuzzy_matches,
437 period2stems=period2stems,
438 )
439 for packager_provided_file in matching_ppfs:
440 provided_files_for_package = provided_files[
441 packager_provided_file.package_name
442 ]
443 match_key = (
444 packager_provided_file.definition.stem,
445 packager_provided_file.provided_key,
446 )
447 existing = provided_files_for_package.get(match_key)
448 if (
449 existing is not None
450 and existing.match_priority > packager_provided_file.match_priority
451 ):
452 continue
453 provided_files_for_package[match_key] = packager_provided_file
455 result = {}
456 for package_name, provided_file_data in provided_files.items():
457 auto_install_list = [
458 x for x in provided_file_data.values() if not x.definition.reservation_only
459 ]
460 reservation_only = collections.defaultdict(list)
461 for packager_provided_file in provided_file_data.values():
462 if not packager_provided_file.definition.reservation_only:
463 continue
464 reservation_only[packager_provided_file.definition.stem].append(
465 packager_provided_file
466 )
468 result[package_name] = PerPackagePackagerProvidedResult(
469 auto_install_list,
470 reservation_only,
471 )
473 return result