Coverage for src/debputy/lsp/ref_models/deb822_reference_parse_models.py: 73%
131 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
1import re
2import sys
3from enum import Enum
4from typing import (
5 NotRequired,
6 List,
7 Any,
8 Optional,
9 Iterable,
10 Literal,
11 Dict,
12 Union,
13)
15from debputy.lsp.diagnostics import LintSeverity
16from debputy.lsp.vendoring._deb822_repro import (
17 LIST_SPACE_SEPARATED_INTERPRETATION,
18 LIST_COMMA_SEPARATED_INTERPRETATION,
19)
20from debputy.lsp.vendoring._deb822_repro.parsing import (
21 LIST_UPLOADERS_INTERPRETATION,
22 Deb822ParsedTokenList,
23 Interpretation,
24 _parse_whitespace_list_value,
25 Deb822ParsedValueElement,
26 _parsed_value_render_factory,
27 ListInterpretation,
28 _parse_separator_list_value,
29)
30from debputy.lsp.vendoring._deb822_repro.tokens import (
31 Deb822SpaceSeparatorToken,
32 _value_line_tokenizer,
33 _RE_WHITESPACE_SEPARATED_WORD_LIST,
34 Deb822ValueToken,
35 Deb822Token,
36 Deb822SeparatorToken,
37 Deb822WhitespaceToken,
38)
39from debputy.manifest_parser.declarative_parser import ParserGenerator
40from debputy.manifest_parser.tagging_types import DebputyParsedContent
42_DEB822_REFERENCE_DATA_PARSER_GENERATOR = ParserGenerator()
44# FIXME: should go into python3-debian
45_RE_COMMA = re.compile("([^,]*),([^,]*)")
47UsageHint = Literal["rare",]
50@_value_line_tokenizer
51def comma_or_space_split_tokenizer(v: str) -> Iterable[Deb822Token]:
52 assert "\n" not in v
53 for match in _RE_WHITESPACE_SEPARATED_WORD_LIST.finditer(v):
54 space_before, word, space_after = match.groups()
55 if space_before: 55 ↛ 57line 55 didn't jump to line 57 because the condition on line 55 was always true
56 yield Deb822SpaceSeparatorToken(sys.intern(space_before))
57 if "," in word: 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true
58 for m in _RE_COMMA.finditer(word):
59 word_before, word_after = m.groups()
60 if word_before:
61 yield Deb822ValueToken(word_before)
62 # ... not quite a whitespace, but it is too much pain to make it a non-whitespace token.
63 yield Deb822SpaceSeparatorToken(",")
64 if word_after:
65 yield Deb822ValueToken(word_after)
66 else:
67 yield Deb822ValueToken(word)
68 if space_after: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true
69 yield Deb822SpaceSeparatorToken(sys.intern(space_after))
72# FIXME: should go into python3-debian
73LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION = ListInterpretation(
74 comma_or_space_split_tokenizer,
75 _parse_whitespace_list_value,
76 Deb822ParsedValueElement,
77 Deb822SpaceSeparatorToken,
78 lambda: Deb822SpaceSeparatorToken(","),
79 _parsed_value_render_factory,
80)
83class Deb822SemicolonToken(Deb822SeparatorToken):
84 """Used by the semicolon-separated list value parsers to denote a semicolon between two value tokens."""
86 __slots__ = ()
88 def __init__(self):
89 # type: () -> None
90 super().__init__(";")
93_RE_SEMICOLON_SEPARATED_WORD_LIST = re.compile(
94 r"""
95 # This regex is slightly complicated by the fact that it should work with
96 # finditer and consume the entire value.
97 #
98 # To do this, we structure the regex so it always starts on a separator (except
99 # for the first iteration, where we permit the absence of a separator)
101 (?: # Optional space followed by a mandatory separator unless
102 # it is the start of the "line" (in which case, we
103 # allow the comma to be omitted)
104 ^
105 |
106 (?:
107 (?P<space_before_separator>\s*) # This space only occurs in practise if the line
108 # starts with space + separator.
109 (?P<separator> ;)
110 )
111 )
113 # From here it is "optional space, maybe a word and then optional space" again. One reason why
114 # all of it is optional is to gracefully cope with trailing separator.
115 (?P<space_before_word>\s*)
116 (?P<word> [^,\s] (?: [^;]*[^;\s])? )? # "Words" can contain spaces for the separated list.
117 # But surrounding whitespace is ignored
118 (?P<space_after_word>\s*)
119""",
120 re.VERBOSE,
121)
124@_value_line_tokenizer
125def comma_split_tokenizer(v):
126 # type: (str) -> Iterable[Deb822Token]
127 assert "\n" not in v
128 for match in _RE_SEMICOLON_SEPARATED_WORD_LIST.finditer(v):
129 space_before_comma, comma, space_before_word, word, space_after_word = (
130 match.groups()
131 )
132 if space_before_comma:
133 yield Deb822WhitespaceToken(sys.intern(space_before_comma))
134 if comma:
135 yield Deb822SemicolonToken()
136 if space_before_word:
137 yield Deb822WhitespaceToken(sys.intern(space_before_word))
138 if word:
139 yield Deb822ValueToken(word)
140 if space_after_word:
141 yield Deb822WhitespaceToken(sys.intern(space_after_word))
144_parse_semicolon_list_value = _parse_separator_list_value(
145 lambda x: isinstance(x, Deb822SemicolonToken)
146)
149LIST_SEMICOLON_SEPARATED_INTERPRETATION = ListInterpretation(
150 comma_split_tokenizer,
151 _parse_semicolon_list_value,
152 Deb822ParsedValueElement,
153 Deb822SemicolonToken,
154 Deb822SemicolonToken,
155 _parsed_value_render_factory,
156)
158_KEY2FIELD_VALUE_CLASS: Dict[str, "FieldValueClass"]
161class FieldValueClass(Enum):
162 SINGLE_VALUE = "single-value", LIST_SPACE_SEPARATED_INTERPRETATION
163 SPACE_SEPARATED_LIST = "space-separated-list", LIST_SPACE_SEPARATED_INTERPRETATION
164 BUILD_PROFILES_LIST = "build-profiles-list", None # TODO
165 COMMA_SEPARATED_LIST = "comma-separated-list", LIST_COMMA_SEPARATED_INTERPRETATION
166 SEMICOLON_SEPARATED_LIST = (
167 "semicolon-separated-list",
168 LIST_SEMICOLON_SEPARATED_INTERPRETATION,
169 )
170 COMMA_SEPARATED_EMAIL_LIST = (
171 "comma-separated-email-list",
172 LIST_UPLOADERS_INTERPRETATION,
173 )
174 COMMA_OR_SPACE_SEPARATED_LIST = (
175 "comma-or-space-separated-list",
176 LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION,
177 )
178 FREE_TEXT_FIELD = "free-text", None
179 DEP5_FILE_LIST = "dep5-file-list", LIST_SPACE_SEPARATED_INTERPRETATION
181 @classmethod
182 def from_key(cls, key: str) -> "FieldValueClass":
183 return _KEY2FIELD_VALUE_CLASS[key]
185 @property
186 def key(self) -> str:
187 return self.value[0]
189 def interpreter(self) -> Optional[Interpretation[Deb822ParsedTokenList[Any, Any]]]:
190 return self.value[1]
193# TODO: Have the parser generator support enums better than this hack.
194FieldValueType = Literal[tuple(x.key for x in FieldValueClass)]
195_KEY2FIELD_VALUE_CLASS = {x.key: x for x in FieldValueClass}
198class Documentation(DebputyParsedContent):
199 synopsis: NotRequired[str]
200 long_description: NotRequired[str]
201 uris: NotRequired[List[str]]
204class GenericVariable(DebputyParsedContent):
205 name: str
206 documentation: NotRequired[Documentation]
209class DCtrlSubstvar(GenericVariable):
210 defined_by: str
211 dh_sequence: NotRequired[str]
214class GenericVariablesReferenceData(DebputyParsedContent):
215 variables: List[GenericVariable]
218class DctrlSubstvarsReferenceData(DebputyParsedContent):
219 variables: List[DCtrlSubstvar]
222class Alias(DebputyParsedContent):
223 alias: str
224 is_completion_suggestion: NotRequired[Literal[True]]
227class StaticValue(DebputyParsedContent):
228 value: str
229 documentation: NotRequired[Documentation]
230 sort_key: NotRequired[str]
231 is_exclusive: NotRequired[Literal[True]]
232 usage_hint: NotRequired[UsageHint]
233 aliases: NotRequired[List[Alias]]
236class Deb822Field(DebputyParsedContent):
237 canonical_name: str
238 field_value_type: FieldValueType
239 unknown_value_authority: NotRequired[str]
240 unknown_value_severity: NotRequired[Union[LintSeverity, Literal["none"]]]
241 missing_field_authority: NotRequired[str]
242 missing_field_severity: NotRequired[LintSeverity]
243 default_value: NotRequired[str]
244 warn_if_default: NotRequired[bool]
245 usage_hint: NotRequired[UsageHint]
246 documentation: NotRequired[Documentation]
247 values: NotRequired[List[StaticValue]]
248 replaced_by: NotRequired[str]
249 is_obsolete_without_replacement: NotRequired[Literal[True]]
250 spellcheck_value: NotRequired[Literal[True]]
251 supports_substvars: NotRequired[Literal[False]]
252 aliases: NotRequired[List[Alias]]
253 inheritable_from_other_stanza: NotRequired[Literal[True]]
256class StanzaType(DebputyParsedContent):
257 stanza_name: str
258 fields: List[Deb822Field]
261class ReferenceVariable(DebputyParsedContent):
262 name: str
263 fallback: str
266class ReferenceDefinition(DebputyParsedContent):
267 variables: NotRequired[List[ReferenceVariable]]
270class Deb822ReferenceData(DebputyParsedContent):
271 definitions: NotRequired[ReferenceDefinition]
272 stanza_types: List[StanzaType]
275DEB822_REFERENCE_DATA_PARSER = _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(
276 Deb822ReferenceData
277)
280GENERIC_VARIABLE_REFERENCE_DATA_PARSER = (
281 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(
282 GenericVariablesReferenceData
283 )
284)
287DCTRL_SUBSTVARS_REFERENCE_DATA_PARSER = (
288 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(DctrlSubstvarsReferenceData)
289)