Coverage for src/debputy/lsp/ref_models/deb822_reference_parse_models.py: 73%
132 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-02-28 19:22 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2026-02-28 19:22 +0000
1import re
2import sys
3from enum import Enum
4from typing import (
5 NotRequired,
6 Any,
7 Literal,
8)
9from collections.abc import Iterable
11from debputy.lsp.diagnostics import LintSeverity
12from debian._deb822_repro import (
13 LIST_SPACE_SEPARATED_INTERPRETATION,
14 LIST_COMMA_SEPARATED_INTERPRETATION,
15)
16from debian._deb822_repro.parsing import (
17 LIST_UPLOADERS_INTERPRETATION,
18 Deb822ParsedTokenList,
19 Interpretation,
20 _parse_whitespace_list_value,
21 Deb822ParsedValueElement,
22 _parsed_value_render_factory,
23 ListInterpretation,
24 _parse_separator_list_value,
25)
26from debian._deb822_repro.tokens import (
27 Deb822SpaceSeparatorToken,
28 _value_line_tokenizer,
29 _RE_WHITESPACE_SEPARATED_WORD_LIST,
30 Deb822ValueToken,
31 Deb822Token,
32 Deb822SeparatorToken,
33 Deb822WhitespaceToken,
34)
35from debputy.manifest_parser.declarative_parser import ParserGenerator
36from debputy.manifest_parser.tagging_types import DebputyParsedContent
38_DEB822_REFERENCE_DATA_PARSER_GENERATOR = ParserGenerator()
40# FIXME: should go into python3-debian
41_RE_COMMA = re.compile("([^,]*),([^,]*)")
43UsageHint = Literal["rare",]
46@_value_line_tokenizer
47def comma_or_space_split_tokenizer(v: str) -> Iterable[Deb822Token]:
48 assert "\n" not in v
49 for match in _RE_WHITESPACE_SEPARATED_WORD_LIST.finditer(v):
50 space_before, word, space_after = match.groups()
51 if space_before: 51 ↛ 53line 51 didn't jump to line 53 because the condition on line 51 was always true
52 yield Deb822SpaceSeparatorToken(sys.intern(space_before))
53 if "," in word: 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true
54 for m in _RE_COMMA.finditer(word):
55 word_before, word_after = m.groups()
56 if word_before:
57 yield Deb822ValueToken(word_before)
58 # ... not quite a whitespace, but it is too much pain to make it a non-whitespace token.
59 yield Deb822SpaceSeparatorToken(",")
60 if word_after:
61 yield Deb822ValueToken(word_after)
62 else:
63 yield Deb822ValueToken(word)
64 if space_after: 64 ↛ 65line 64 didn't jump to line 65 because the condition on line 64 was never true
65 yield Deb822SpaceSeparatorToken(sys.intern(space_after))
68# FIXME: should go into python3-debian
69LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION = ListInterpretation(
70 comma_or_space_split_tokenizer,
71 _parse_whitespace_list_value,
72 Deb822ParsedValueElement,
73 Deb822SpaceSeparatorToken,
74 lambda: Deb822SpaceSeparatorToken(","),
75 _parsed_value_render_factory,
76)
79class Deb822SemicolonToken(Deb822SeparatorToken):
80 """Used by the semicolon-separated list value parsers to denote a semicolon between two value tokens."""
82 __slots__ = ()
84 def __init__(self):
85 # type: () -> None
86 super().__init__(";")
89_RE_SEMICOLON_SEPARATED_WORD_LIST = re.compile(
90 r"""
91 # This regex is slightly complicated by the fact that it should work with
92 # finditer and consume the entire value.
93 #
94 # To do this, we structure the regex so it always starts on a separator (except
95 # for the first iteration, where we permit the absence of a separator)
97 (?: # Optional space followed by a mandatory separator unless
98 # it is the start of the "line" (in which case, we
99 # allow the comma to be omitted)
100 ^
101 |
102 (?:
103 (?P<space_before_separator>\s*) # This space only occurs in practise if the line
104 # starts with space + separator.
105 (?P<separator> ;)
106 )
107 )
109 # From here it is "optional space, maybe a word and then optional space" again. One reason why
110 # all of it is optional is to gracefully cope with trailing separator.
111 (?P<space_before_word>\s*)
112 (?P<word> [^,\s] (?: [^;]*[^;\s])? )? # "Words" can contain spaces for the separated list.
113 # But surrounding whitespace is ignored
114 (?P<space_after_word>\s*)
115""",
116 re.VERBOSE,
117)
120@_value_line_tokenizer
121def comma_split_tokenizer(v):
122 # type: (str) -> Iterable[Deb822Token]
123 assert "\n" not in v
124 for match in _RE_SEMICOLON_SEPARATED_WORD_LIST.finditer(v):
125 space_before_comma, comma, space_before_word, word, space_after_word = (
126 match.groups()
127 )
128 if space_before_comma:
129 yield Deb822WhitespaceToken(sys.intern(space_before_comma))
130 if comma:
131 yield Deb822SemicolonToken()
132 if space_before_word:
133 yield Deb822WhitespaceToken(sys.intern(space_before_word))
134 if word:
135 yield Deb822ValueToken(word)
136 if space_after_word:
137 yield Deb822WhitespaceToken(sys.intern(space_after_word))
140_parse_semicolon_list_value = _parse_separator_list_value(
141 lambda x: isinstance(x, Deb822SemicolonToken)
142)
145LIST_SEMICOLON_SEPARATED_INTERPRETATION = ListInterpretation(
146 comma_split_tokenizer,
147 _parse_semicolon_list_value,
148 Deb822ParsedValueElement,
149 Deb822SemicolonToken,
150 Deb822SemicolonToken,
151 _parsed_value_render_factory,
152)
154_KEY2FIELD_VALUE_CLASS: dict[str, "FieldValueClass"]
157class FieldValueClass(Enum):
158 SINGLE_VALUE = "single-value", LIST_SPACE_SEPARATED_INTERPRETATION
159 SPACE_SEPARATED_LIST = "space-separated-list", LIST_SPACE_SEPARATED_INTERPRETATION
160 BUILD_PROFILES_LIST = "build-profiles-list", None # TODO
161 COMMA_SEPARATED_LIST = "comma-separated-list", LIST_COMMA_SEPARATED_INTERPRETATION
162 SEMICOLON_SEPARATED_LIST = (
163 "semicolon-separated-list",
164 LIST_SEMICOLON_SEPARATED_INTERPRETATION,
165 )
166 COMMA_SEPARATED_EMAIL_LIST = (
167 "comma-separated-email-list",
168 LIST_UPLOADERS_INTERPRETATION,
169 )
170 COMMA_OR_SPACE_SEPARATED_LIST = (
171 "comma-or-space-separated-list",
172 LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION,
173 )
174 FREE_TEXT_FIELD = "free-text", None
175 DEP5_FILE_LIST = "dep5-file-list", LIST_SPACE_SEPARATED_INTERPRETATION
177 @classmethod
178 def from_key(cls, key: str) -> "FieldValueClass":
179 return _KEY2FIELD_VALUE_CLASS[key]
181 @property
182 def key(self) -> str:
183 return self.value[0]
185 def interpreter(self) -> Interpretation[Deb822ParsedTokenList[Any, Any]] | None:
186 return self.value[1]
189# TODO: Have the parser generator support enums better than this hack.
190FieldValueType = Literal[tuple(x.key for x in FieldValueClass)]
191_KEY2FIELD_VALUE_CLASS = {x.key: x for x in FieldValueClass}
194class Documentation(DebputyParsedContent):
195 synopsis: NotRequired[str]
196 long_description: NotRequired[str]
197 uris: NotRequired[list[str]]
200class GenericVariable(DebputyParsedContent):
201 name: str
202 documentation: NotRequired[Documentation]
205class DCtrlSubstvar(GenericVariable):
206 defined_by: str
207 dh_sequence: NotRequired[str]
210class GenericVariablesReferenceData(DebputyParsedContent):
211 variables: list[GenericVariable]
214class DctrlSubstvarsReferenceData(DebputyParsedContent):
215 variables: list[DCtrlSubstvar]
218class Alias(DebputyParsedContent):
219 alias: str
220 is_completion_suggestion: NotRequired[Literal[True]]
223class StaticValue(DebputyParsedContent):
224 value: str
225 documentation: NotRequired[Documentation]
226 sort_key: NotRequired[str]
227 is_exclusive: NotRequired[Literal[True]]
228 usage_hint: NotRequired[UsageHint]
229 aliases: NotRequired[list[Alias]]
232class Deb822Field(DebputyParsedContent):
233 canonical_name: str
234 field_value_type: FieldValueType
235 unknown_value_authority: NotRequired[str]
236 unknown_value_severity: NotRequired[LintSeverity | Literal["none"]]
237 missing_field_authority: NotRequired[str]
238 missing_field_severity: NotRequired[LintSeverity]
239 default_value: NotRequired[str]
240 warn_if_default: NotRequired[bool]
241 usage_hint: NotRequired[UsageHint]
242 documentation: NotRequired[Documentation]
243 values: NotRequired[list[StaticValue]]
244 replaced_by: NotRequired[str]
245 is_obsolete_without_replacement: NotRequired[Literal[True]]
246 spellcheck_value: NotRequired[Literal[True]]
247 supports_substvars: NotRequired[Literal[False]]
248 aliases: NotRequired[list[Alias]]
249 inheritable_from_other_stanza: NotRequired[Literal[True]]
252class StanzaType(DebputyParsedContent):
253 stanza_name: str
254 fields: list[Deb822Field]
257class ReferenceVariable(DebputyParsedContent):
258 name: str
259 fallback: str
262class ReferenceDefinition(DebputyParsedContent):
263 variables: NotRequired[list[ReferenceVariable]]
266class Deb822ReferenceData(DebputyParsedContent):
267 definitions: NotRequired[ReferenceDefinition]
268 stanza_types: list[StanzaType]
271DEB822_REFERENCE_DATA_PARSER = _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(
272 Deb822ReferenceData
273)
276GENERIC_VARIABLE_REFERENCE_DATA_PARSER = (
277 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(
278 GenericVariablesReferenceData
279 )
280)
283DCTRL_SUBSTVARS_REFERENCE_DATA_PARSER = (
284 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(DctrlSubstvarsReferenceData)
285)