Coverage for src/debputy/lsp/ref_models/deb822_reference_parse_models.py: 73%

132 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-02-28 19:22 +0000

1import re 

2import sys 

3from enum import Enum 

4from typing import ( 

5 NotRequired, 

6 Any, 

7 Literal, 

8) 

9from collections.abc import Iterable 

10 

11from debputy.lsp.diagnostics import LintSeverity 

12from debian._deb822_repro import ( 

13 LIST_SPACE_SEPARATED_INTERPRETATION, 

14 LIST_COMMA_SEPARATED_INTERPRETATION, 

15) 

16from debian._deb822_repro.parsing import ( 

17 LIST_UPLOADERS_INTERPRETATION, 

18 Deb822ParsedTokenList, 

19 Interpretation, 

20 _parse_whitespace_list_value, 

21 Deb822ParsedValueElement, 

22 _parsed_value_render_factory, 

23 ListInterpretation, 

24 _parse_separator_list_value, 

25) 

26from debian._deb822_repro.tokens import ( 

27 Deb822SpaceSeparatorToken, 

28 _value_line_tokenizer, 

29 _RE_WHITESPACE_SEPARATED_WORD_LIST, 

30 Deb822ValueToken, 

31 Deb822Token, 

32 Deb822SeparatorToken, 

33 Deb822WhitespaceToken, 

34) 

35from debputy.manifest_parser.declarative_parser import ParserGenerator 

36from debputy.manifest_parser.tagging_types import DebputyParsedContent 

37 

38_DEB822_REFERENCE_DATA_PARSER_GENERATOR = ParserGenerator() 

39 

40# FIXME: should go into python3-debian 

41_RE_COMMA = re.compile("([^,]*),([^,]*)") 

42 

43UsageHint = Literal["rare",] 

44 

45 

46@_value_line_tokenizer 

47def comma_or_space_split_tokenizer(v: str) -> Iterable[Deb822Token]: 

48 assert "\n" not in v 

49 for match in _RE_WHITESPACE_SEPARATED_WORD_LIST.finditer(v): 

50 space_before, word, space_after = match.groups() 

51 if space_before: 51 ↛ 53line 51 didn't jump to line 53 because the condition on line 51 was always true

52 yield Deb822SpaceSeparatorToken(sys.intern(space_before)) 

53 if "," in word: 53 ↛ 54line 53 didn't jump to line 54 because the condition on line 53 was never true

54 for m in _RE_COMMA.finditer(word): 

55 word_before, word_after = m.groups() 

56 if word_before: 

57 yield Deb822ValueToken(word_before) 

58 # ... not quite a whitespace, but it is too much pain to make it a non-whitespace token. 

59 yield Deb822SpaceSeparatorToken(",") 

60 if word_after: 

61 yield Deb822ValueToken(word_after) 

62 else: 

63 yield Deb822ValueToken(word) 

64 if space_after: 64 ↛ 65line 64 didn't jump to line 65 because the condition on line 64 was never true

65 yield Deb822SpaceSeparatorToken(sys.intern(space_after)) 

66 

67 

68# FIXME: should go into python3-debian 

69LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION = ListInterpretation( 

70 comma_or_space_split_tokenizer, 

71 _parse_whitespace_list_value, 

72 Deb822ParsedValueElement, 

73 Deb822SpaceSeparatorToken, 

74 lambda: Deb822SpaceSeparatorToken(","), 

75 _parsed_value_render_factory, 

76) 

77 

78 

79class Deb822SemicolonToken(Deb822SeparatorToken): 

80 """Used by the semicolon-separated list value parsers to denote a semicolon between two value tokens.""" 

81 

82 __slots__ = () 

83 

84 def __init__(self): 

85 # type: () -> None 

86 super().__init__(";") 

87 

88 

89_RE_SEMICOLON_SEPARATED_WORD_LIST = re.compile( 

90 r""" 

91 # This regex is slightly complicated by the fact that it should work with 

92 # finditer and consume the entire value. 

93 # 

94 # To do this, we structure the regex so it always starts on a separator (except 

95 # for the first iteration, where we permit the absence of a separator) 

96 

97 (?: # Optional space followed by a mandatory separator unless 

98 # it is the start of the "line" (in which case, we 

99 # allow the comma to be omitted) 

100 ^ 

101 | 

102 (?: 

103 (?P<space_before_separator>\s*) # This space only occurs in practise if the line 

104 # starts with space + separator. 

105 (?P<separator> ;) 

106 ) 

107 ) 

108 

109 # From here it is "optional space, maybe a word and then optional space" again. One reason why 

110 # all of it is optional is to gracefully cope with trailing separator. 

111 (?P<space_before_word>\s*) 

112 (?P<word> [^,\s] (?: [^;]*[^;\s])? )? # "Words" can contain spaces for the separated list. 

113 # But surrounding whitespace is ignored 

114 (?P<space_after_word>\s*) 

115""", 

116 re.VERBOSE, 

117) 

118 

119 

120@_value_line_tokenizer 

121def comma_split_tokenizer(v): 

122 # type: (str) -> Iterable[Deb822Token] 

123 assert "\n" not in v 

124 for match in _RE_SEMICOLON_SEPARATED_WORD_LIST.finditer(v): 

125 space_before_comma, comma, space_before_word, word, space_after_word = ( 

126 match.groups() 

127 ) 

128 if space_before_comma: 

129 yield Deb822WhitespaceToken(sys.intern(space_before_comma)) 

130 if comma: 

131 yield Deb822SemicolonToken() 

132 if space_before_word: 

133 yield Deb822WhitespaceToken(sys.intern(space_before_word)) 

134 if word: 

135 yield Deb822ValueToken(word) 

136 if space_after_word: 

137 yield Deb822WhitespaceToken(sys.intern(space_after_word)) 

138 

139 

140_parse_semicolon_list_value = _parse_separator_list_value( 

141 lambda x: isinstance(x, Deb822SemicolonToken) 

142) 

143 

144 

145LIST_SEMICOLON_SEPARATED_INTERPRETATION = ListInterpretation( 

146 comma_split_tokenizer, 

147 _parse_semicolon_list_value, 

148 Deb822ParsedValueElement, 

149 Deb822SemicolonToken, 

150 Deb822SemicolonToken, 

151 _parsed_value_render_factory, 

152) 

153 

154_KEY2FIELD_VALUE_CLASS: dict[str, "FieldValueClass"] 

155 

156 

157class FieldValueClass(Enum): 

158 SINGLE_VALUE = "single-value", LIST_SPACE_SEPARATED_INTERPRETATION 

159 SPACE_SEPARATED_LIST = "space-separated-list", LIST_SPACE_SEPARATED_INTERPRETATION 

160 BUILD_PROFILES_LIST = "build-profiles-list", None # TODO 

161 COMMA_SEPARATED_LIST = "comma-separated-list", LIST_COMMA_SEPARATED_INTERPRETATION 

162 SEMICOLON_SEPARATED_LIST = ( 

163 "semicolon-separated-list", 

164 LIST_SEMICOLON_SEPARATED_INTERPRETATION, 

165 ) 

166 COMMA_SEPARATED_EMAIL_LIST = ( 

167 "comma-separated-email-list", 

168 LIST_UPLOADERS_INTERPRETATION, 

169 ) 

170 COMMA_OR_SPACE_SEPARATED_LIST = ( 

171 "comma-or-space-separated-list", 

172 LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION, 

173 ) 

174 FREE_TEXT_FIELD = "free-text", None 

175 DEP5_FILE_LIST = "dep5-file-list", LIST_SPACE_SEPARATED_INTERPRETATION 

176 

177 @classmethod 

178 def from_key(cls, key: str) -> "FieldValueClass": 

179 return _KEY2FIELD_VALUE_CLASS[key] 

180 

181 @property 

182 def key(self) -> str: 

183 return self.value[0] 

184 

185 def interpreter(self) -> Interpretation[Deb822ParsedTokenList[Any, Any]] | None: 

186 return self.value[1] 

187 

188 

189# TODO: Have the parser generator support enums better than this hack. 

190FieldValueType = Literal[tuple(x.key for x in FieldValueClass)] 

191_KEY2FIELD_VALUE_CLASS = {x.key: x for x in FieldValueClass} 

192 

193 

194class Documentation(DebputyParsedContent): 

195 synopsis: NotRequired[str] 

196 long_description: NotRequired[str] 

197 uris: NotRequired[list[str]] 

198 

199 

200class GenericVariable(DebputyParsedContent): 

201 name: str 

202 documentation: NotRequired[Documentation] 

203 

204 

205class DCtrlSubstvar(GenericVariable): 

206 defined_by: str 

207 dh_sequence: NotRequired[str] 

208 

209 

210class GenericVariablesReferenceData(DebputyParsedContent): 

211 variables: list[GenericVariable] 

212 

213 

214class DctrlSubstvarsReferenceData(DebputyParsedContent): 

215 variables: list[DCtrlSubstvar] 

216 

217 

218class Alias(DebputyParsedContent): 

219 alias: str 

220 is_completion_suggestion: NotRequired[Literal[True]] 

221 

222 

223class StaticValue(DebputyParsedContent): 

224 value: str 

225 documentation: NotRequired[Documentation] 

226 sort_key: NotRequired[str] 

227 is_exclusive: NotRequired[Literal[True]] 

228 usage_hint: NotRequired[UsageHint] 

229 aliases: NotRequired[list[Alias]] 

230 

231 

232class Deb822Field(DebputyParsedContent): 

233 canonical_name: str 

234 field_value_type: FieldValueType 

235 unknown_value_authority: NotRequired[str] 

236 unknown_value_severity: NotRequired[LintSeverity | Literal["none"]] 

237 missing_field_authority: NotRequired[str] 

238 missing_field_severity: NotRequired[LintSeverity] 

239 default_value: NotRequired[str] 

240 warn_if_default: NotRequired[bool] 

241 usage_hint: NotRequired[UsageHint] 

242 documentation: NotRequired[Documentation] 

243 values: NotRequired[list[StaticValue]] 

244 replaced_by: NotRequired[str] 

245 is_obsolete_without_replacement: NotRequired[Literal[True]] 

246 spellcheck_value: NotRequired[Literal[True]] 

247 supports_substvars: NotRequired[Literal[False]] 

248 aliases: NotRequired[list[Alias]] 

249 inheritable_from_other_stanza: NotRequired[Literal[True]] 

250 

251 

252class StanzaType(DebputyParsedContent): 

253 stanza_name: str 

254 fields: list[Deb822Field] 

255 

256 

257class ReferenceVariable(DebputyParsedContent): 

258 name: str 

259 fallback: str 

260 

261 

262class ReferenceDefinition(DebputyParsedContent): 

263 variables: NotRequired[list[ReferenceVariable]] 

264 

265 

266class Deb822ReferenceData(DebputyParsedContent): 

267 definitions: NotRequired[ReferenceDefinition] 

268 stanza_types: list[StanzaType] 

269 

270 

271DEB822_REFERENCE_DATA_PARSER = _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser( 

272 Deb822ReferenceData 

273) 

274 

275 

276GENERIC_VARIABLE_REFERENCE_DATA_PARSER = ( 

277 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser( 

278 GenericVariablesReferenceData 

279 ) 

280) 

281 

282 

283DCTRL_SUBSTVARS_REFERENCE_DATA_PARSER = ( 

284 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(DctrlSubstvarsReferenceData) 

285)