Coverage for src/debputy/lsp/ref_models/deb822_reference_parse_models.py: 73%

131 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-09-07 09:27 +0000

1import re 

2import sys 

3from enum import Enum 

4from typing import ( 

5 NotRequired, 

6 List, 

7 Any, 

8 Optional, 

9 Iterable, 

10 Literal, 

11 Dict, 

12 Union, 

13) 

14 

15from debputy.lsp.diagnostics import LintSeverity 

16from debputy.lsp.vendoring._deb822_repro import ( 

17 LIST_SPACE_SEPARATED_INTERPRETATION, 

18 LIST_COMMA_SEPARATED_INTERPRETATION, 

19) 

20from debputy.lsp.vendoring._deb822_repro.parsing import ( 

21 LIST_UPLOADERS_INTERPRETATION, 

22 Deb822ParsedTokenList, 

23 Interpretation, 

24 _parse_whitespace_list_value, 

25 Deb822ParsedValueElement, 

26 _parsed_value_render_factory, 

27 ListInterpretation, 

28 _parse_separator_list_value, 

29) 

30from debputy.lsp.vendoring._deb822_repro.tokens import ( 

31 Deb822SpaceSeparatorToken, 

32 _value_line_tokenizer, 

33 _RE_WHITESPACE_SEPARATED_WORD_LIST, 

34 Deb822ValueToken, 

35 Deb822Token, 

36 Deb822SeparatorToken, 

37 Deb822WhitespaceToken, 

38) 

39from debputy.manifest_parser.declarative_parser import ParserGenerator 

40from debputy.manifest_parser.tagging_types import DebputyParsedContent 

41 

42_DEB822_REFERENCE_DATA_PARSER_GENERATOR = ParserGenerator() 

43 

44# FIXME: should go into python3-debian 

45_RE_COMMA = re.compile("([^,]*),([^,]*)") 

46 

47UsageHint = Literal["rare",] 

48 

49 

50@_value_line_tokenizer 

51def comma_or_space_split_tokenizer(v: str) -> Iterable[Deb822Token]: 

52 assert "\n" not in v 

53 for match in _RE_WHITESPACE_SEPARATED_WORD_LIST.finditer(v): 

54 space_before, word, space_after = match.groups() 

55 if space_before: 55 ↛ 57line 55 didn't jump to line 57 because the condition on line 55 was always true

56 yield Deb822SpaceSeparatorToken(sys.intern(space_before)) 

57 if "," in word: 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true

58 for m in _RE_COMMA.finditer(word): 

59 word_before, word_after = m.groups() 

60 if word_before: 

61 yield Deb822ValueToken(word_before) 

62 # ... not quite a whitespace, but it is too much pain to make it a non-whitespace token. 

63 yield Deb822SpaceSeparatorToken(",") 

64 if word_after: 

65 yield Deb822ValueToken(word_after) 

66 else: 

67 yield Deb822ValueToken(word) 

68 if space_after: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true

69 yield Deb822SpaceSeparatorToken(sys.intern(space_after)) 

70 

71 

72# FIXME: should go into python3-debian 

73LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION = ListInterpretation( 

74 comma_or_space_split_tokenizer, 

75 _parse_whitespace_list_value, 

76 Deb822ParsedValueElement, 

77 Deb822SpaceSeparatorToken, 

78 lambda: Deb822SpaceSeparatorToken(","), 

79 _parsed_value_render_factory, 

80) 

81 

82 

83class Deb822SemicolonToken(Deb822SeparatorToken): 

84 """Used by the semicolon-separated list value parsers to denote a semicolon between two value tokens.""" 

85 

86 __slots__ = () 

87 

88 def __init__(self): 

89 # type: () -> None 

90 super().__init__(";") 

91 

92 

93_RE_SEMICOLON_SEPARATED_WORD_LIST = re.compile( 

94 r""" 

95 # This regex is slightly complicated by the fact that it should work with 

96 # finditer and consume the entire value. 

97 # 

98 # To do this, we structure the regex so it always starts on a separator (except 

99 # for the first iteration, where we permit the absence of a separator) 

100 

101 (?: # Optional space followed by a mandatory separator unless 

102 # it is the start of the "line" (in which case, we 

103 # allow the comma to be omitted) 

104 ^ 

105 | 

106 (?: 

107 (?P<space_before_separator>\s*) # This space only occurs in practise if the line 

108 # starts with space + separator. 

109 (?P<separator> ;) 

110 ) 

111 ) 

112 

113 # From here it is "optional space, maybe a word and then optional space" again. One reason why 

114 # all of it is optional is to gracefully cope with trailing separator. 

115 (?P<space_before_word>\s*) 

116 (?P<word> [^,\s] (?: [^;]*[^;\s])? )? # "Words" can contain spaces for the separated list. 

117 # But surrounding whitespace is ignored 

118 (?P<space_after_word>\s*) 

119""", 

120 re.VERBOSE, 

121) 

122 

123 

124@_value_line_tokenizer 

125def comma_split_tokenizer(v): 

126 # type: (str) -> Iterable[Deb822Token] 

127 assert "\n" not in v 

128 for match in _RE_SEMICOLON_SEPARATED_WORD_LIST.finditer(v): 

129 space_before_comma, comma, space_before_word, word, space_after_word = ( 

130 match.groups() 

131 ) 

132 if space_before_comma: 

133 yield Deb822WhitespaceToken(sys.intern(space_before_comma)) 

134 if comma: 

135 yield Deb822SemicolonToken() 

136 if space_before_word: 

137 yield Deb822WhitespaceToken(sys.intern(space_before_word)) 

138 if word: 

139 yield Deb822ValueToken(word) 

140 if space_after_word: 

141 yield Deb822WhitespaceToken(sys.intern(space_after_word)) 

142 

143 

144_parse_semicolon_list_value = _parse_separator_list_value( 

145 lambda x: isinstance(x, Deb822SemicolonToken) 

146) 

147 

148 

149LIST_SEMICOLON_SEPARATED_INTERPRETATION = ListInterpretation( 

150 comma_split_tokenizer, 

151 _parse_semicolon_list_value, 

152 Deb822ParsedValueElement, 

153 Deb822SemicolonToken, 

154 Deb822SemicolonToken, 

155 _parsed_value_render_factory, 

156) 

157 

158_KEY2FIELD_VALUE_CLASS: Dict[str, "FieldValueClass"] 

159 

160 

161class FieldValueClass(Enum): 

162 SINGLE_VALUE = "single-value", LIST_SPACE_SEPARATED_INTERPRETATION 

163 SPACE_SEPARATED_LIST = "space-separated-list", LIST_SPACE_SEPARATED_INTERPRETATION 

164 BUILD_PROFILES_LIST = "build-profiles-list", None # TODO 

165 COMMA_SEPARATED_LIST = "comma-separated-list", LIST_COMMA_SEPARATED_INTERPRETATION 

166 SEMICOLON_SEPARATED_LIST = ( 

167 "semicolon-separated-list", 

168 LIST_SEMICOLON_SEPARATED_INTERPRETATION, 

169 ) 

170 COMMA_SEPARATED_EMAIL_LIST = ( 

171 "comma-separated-email-list", 

172 LIST_UPLOADERS_INTERPRETATION, 

173 ) 

174 COMMA_OR_SPACE_SEPARATED_LIST = ( 

175 "comma-or-space-separated-list", 

176 LIST_COMMA_OR_SPACE_SEPARATED_INTERPRETATION, 

177 ) 

178 FREE_TEXT_FIELD = "free-text", None 

179 DEP5_FILE_LIST = "dep5-file-list", LIST_SPACE_SEPARATED_INTERPRETATION 

180 

181 @classmethod 

182 def from_key(cls, key: str) -> "FieldValueClass": 

183 return _KEY2FIELD_VALUE_CLASS[key] 

184 

185 @property 

186 def key(self) -> str: 

187 return self.value[0] 

188 

189 def interpreter(self) -> Optional[Interpretation[Deb822ParsedTokenList[Any, Any]]]: 

190 return self.value[1] 

191 

192 

193# TODO: Have the parser generator support enums better than this hack. 

194FieldValueType = Literal[tuple(x.key for x in FieldValueClass)] 

195_KEY2FIELD_VALUE_CLASS = {x.key: x for x in FieldValueClass} 

196 

197 

198class Documentation(DebputyParsedContent): 

199 synopsis: NotRequired[str] 

200 long_description: NotRequired[str] 

201 uris: NotRequired[List[str]] 

202 

203 

204class GenericVariable(DebputyParsedContent): 

205 name: str 

206 documentation: NotRequired[Documentation] 

207 

208 

209class DCtrlSubstvar(GenericVariable): 

210 defined_by: str 

211 dh_sequence: NotRequired[str] 

212 

213 

214class GenericVariablesReferenceData(DebputyParsedContent): 

215 variables: List[GenericVariable] 

216 

217 

218class DctrlSubstvarsReferenceData(DebputyParsedContent): 

219 variables: List[DCtrlSubstvar] 

220 

221 

222class Alias(DebputyParsedContent): 

223 alias: str 

224 is_completion_suggestion: NotRequired[Literal[True]] 

225 

226 

227class StaticValue(DebputyParsedContent): 

228 value: str 

229 documentation: NotRequired[Documentation] 

230 sort_key: NotRequired[str] 

231 is_exclusive: NotRequired[Literal[True]] 

232 usage_hint: NotRequired[UsageHint] 

233 aliases: NotRequired[List[Alias]] 

234 

235 

236class Deb822Field(DebputyParsedContent): 

237 canonical_name: str 

238 field_value_type: FieldValueType 

239 unknown_value_authority: NotRequired[str] 

240 unknown_value_severity: NotRequired[Union[LintSeverity, Literal["none"]]] 

241 missing_field_authority: NotRequired[str] 

242 missing_field_severity: NotRequired[LintSeverity] 

243 default_value: NotRequired[str] 

244 warn_if_default: NotRequired[bool] 

245 usage_hint: NotRequired[UsageHint] 

246 documentation: NotRequired[Documentation] 

247 values: NotRequired[List[StaticValue]] 

248 replaced_by: NotRequired[str] 

249 is_obsolete_without_replacement: NotRequired[Literal[True]] 

250 spellcheck_value: NotRequired[Literal[True]] 

251 supports_substvars: NotRequired[Literal[False]] 

252 aliases: NotRequired[List[Alias]] 

253 inheritable_from_other_stanza: NotRequired[Literal[True]] 

254 

255 

256class StanzaType(DebputyParsedContent): 

257 stanza_name: str 

258 fields: List[Deb822Field] 

259 

260 

261class ReferenceVariable(DebputyParsedContent): 

262 name: str 

263 fallback: str 

264 

265 

266class ReferenceDefinition(DebputyParsedContent): 

267 variables: NotRequired[List[ReferenceVariable]] 

268 

269 

270class Deb822ReferenceData(DebputyParsedContent): 

271 definitions: NotRequired[ReferenceDefinition] 

272 stanza_types: List[StanzaType] 

273 

274 

275DEB822_REFERENCE_DATA_PARSER = _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser( 

276 Deb822ReferenceData 

277) 

278 

279 

280GENERIC_VARIABLE_REFERENCE_DATA_PARSER = ( 

281 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser( 

282 GenericVariablesReferenceData 

283 ) 

284) 

285 

286 

287DCTRL_SUBSTVARS_REFERENCE_DATA_PARSER = ( 

288 _DEB822_REFERENCE_DATA_PARSER_GENERATOR.generate_parser(DctrlSubstvarsReferenceData) 

289)