Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%
191 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-09-07 09:27 +0000
1import re
2from email.utils import parsedate_to_datetime
3from typing import Optional, Tuple
5from debputy.linting.lint_util import LintState
6from debputy.lsp.lsp_features import (
7 lsp_standard_handler,
8 SecondaryLanguage,
9 LanguageDispatchRule,
10 lint_diagnostics,
11)
12from debputy.lsp.quickfixes import (
13 propose_correct_text_quick_fix,
14)
15from debputy.lsp.spellchecking import spellcheck_line
16from debputy.lsprotocol.types import (
17 TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL,
18 TEXT_DOCUMENT_CODE_ACTION,
19)
20from debputy.util import PKGVERSION_REGEX
22try:
23 from debputy.lsp.vendoring._deb822_repro.locatable import (
24 Position as TEPosition,
25 Range as TERange,
26 )
28 from pygls.server import LanguageServer
29 from pygls.workspace import TextDocument
30 from debputy.lsp.debputy_ls import DebputyLanguageServer
31except ImportError:
32 pass
35# Same as Lintian
36_MAXIMUM_WIDTH: int = 82
37_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest
38_DISPATCH_RULE = LanguageDispatchRule.new_rule(
39 "debian/changelog",
40 None,
41 ("debian/changelog", "debian/changelog.dch"),
42 [
43 # emacs's name
44 SecondaryLanguage("debian-changelog"),
45 # vim's name
46 SecondaryLanguage("debchangelog"),
47 SecondaryLanguage("dch"),
48 ],
49)
52_WEEKDAYS_BY_IDX = [
53 "Mon",
54 "Tue",
55 "Wed",
56 "Thu",
57 "Fri",
58 "Sat",
59 "Sun",
60]
61_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)
64lsp_standard_handler(_DISPATCH_RULE, TEXT_DOCUMENT_CODE_ACTION)
65lsp_standard_handler(_DISPATCH_RULE, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)
67DPM_DCH_SECTION = "Policy 4.4"
70def _check_footer_date(
71 lint_state: LintState,
72 line: str,
73 line_no: int,
74 line_len: int,
75 start_date_idx: int,
76) -> None:
77 # 3 characters for the day name (Mon), then a comma plus a space followed by the
78 # actual date. The 6 characters limit is a gross under estimation of the real
79 # size.
80 if line_len < start_date_idx + 6:
81 text_range = _single_line_subrange(line_no, start_date_idx, line_len)
82 lint_state.emit_diagnostic(
83 text_range,
84 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",
85 "error",
86 DPM_DCH_SECTION,
87 )
88 return
89 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)
90 day_name = line[start_date_idx : start_date_idx + 3]
91 if day_name not in _KNOWN_WEEK_DAYS:
92 lint_state.emit_diagnostic(
93 day_name_range,
94 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",
95 "error",
96 DPM_DCH_SECTION,
97 )
98 return
100 date_str = line[start_date_idx + 5 :]
102 if line[start_date_idx + 3 : start_date_idx + 5] != ", ":
103 sep = line[start_date_idx + 3 : start_date_idx + 5]
104 text_range = _single_line_subrange(
105 line_no,
106 start_date_idx + 3,
107 start_date_idx + 5,
108 )
109 lint_state.emit_diagnostic(
110 text_range,
111 f'Improper formatting of date. Expected ", " here, not "{sep}"',
112 "error",
113 DPM_DCH_SECTION,
114 )
115 return
117 try:
118 # FIXME: this parser is too forgiving (it ignores trailing garbage)
119 date = parsedate_to_datetime(date_str)
120 except ValueError as e:
121 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)
122 lint_state.emit_diagnostic(
123 error_range,
124 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",
125 "error",
126 "debputy",
127 )
128 return
129 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]
130 if expected_week_day != day_name:
131 lint_state.emit_diagnostic(
132 day_name_range,
133 f"The date was a {expected_week_day}day",
134 "warning",
135 "debputy",
136 quickfixes=[propose_correct_text_quick_fix(expected_week_day)],
137 )
140def _check_email(
141 lint_state: LintState,
142 line: str,
143 line_no: int,
144 line_len: int,
145) -> Tuple[int, int]:
146 email_start_idx = _offset_of(line, "<", 4)
147 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))
149 if not (3 < email_start_idx < email_end_idx):
150 # Email invalid
151 if email_start_idx >= 3:
152 msg = 'Missing closing ">" to finish email address before the sign off date'
153 diag_start = email_start_idx
154 diag_end = _offset_of(
155 line,
156 " ",
157 email_start_idx,
158 offset_if_missing=line_len,
159 )
160 else:
161 if email_end_idx > -1:
162 diag_start = 1 + _roffset_of(
163 line,
164 " ",
165 4,
166 email_end_idx - 1,
167 offset_if_missing=4,
168 )
169 if diag_start > 4: 169 ↛ 172line 169 didn't jump to line 172 because the condition on line 169 was always true
170 email_start_idx = diag_start
171 # For consistency, we always include the trailing `>`.
172 diag_end = email_end_idx + 1
173 else:
174 diag_start = 4
175 diag_end = _offset_of(
176 line,
177 " ",
178 diag_start,
179 offset_if_missing=line_len,
180 )
181 msg = 'Missing opening "<" to start the email address after the name'
182 lint_state.emit_diagnostic(
183 _single_line_subrange(line_no, diag_start, diag_end),
184 msg,
185 "error",
186 DPM_DCH_SECTION,
187 )
188 if email_end_idx < 0:
189 email_end_idx = diag_end
190 return email_start_idx, email_end_idx
193def _check_footer_line(
194 lint_state: LintState,
195 line: str,
196 line_no: int,
197) -> None:
198 if line.rstrip() == " --":
199 lint_state.emit_diagnostic(
200 _single_line_subrange(line_no, 0, 3),
201 'Missing "Name <email@example.com>"',
202 "error",
203 DPM_DCH_SECTION,
204 # TODO: Add quick fix to insert all the relevant data.
205 )
206 return
207 line_len = len(line)
208 if not line.startswith(" -- "):
209 # Pre-condition for this function being called.
210 assert line.startswith(" --") and line_len > 3
211 lint_state.emit_diagnostic(
212 _single_line_subrange(line_no, 0, line_len),
213 'Start of sign-off line should be " -- ".',
214 "error",
215 DPM_DCH_SECTION,
216 quickfixes=[propose_correct_text_quick_fix(" -- ")],
217 )
218 return
220 email_start_idx, email_end_idx = _check_email(
221 lint_state,
222 line,
223 line_no,
224 line_len,
225 )
227 start_of_email_identified = email_start_idx > 3
228 end_of_email_identified = 4 < email_end_idx < line_len
229 if not start_of_email_identified:
230 return
232 # Email valid, name might be missing
233 name_start = 4
234 name_end = email_start_idx
235 name = line[name_start:name_end]
236 if not name or name.isspace():
237 # The range must always be at least one character width. There is no good direction
238 # to expand it in. Picked left because we know there is a space character,
239 if name_end - name_start < 1:
240 name_start -= 1
241 lint_state.emit_diagnostic(
242 _single_line_subrange(line_no, name_start, name_end),
243 "Missing name before email",
244 "error",
245 DPM_DCH_SECTION,
246 )
247 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:
248 lint_state.emit_diagnostic(
249 _single_line_subrange(line_no, name_start, name_end),
250 "Non-standard spacing around the name",
251 "warning",
252 DPM_DCH_SECTION,
253 quickfixes=[
254 propose_correct_text_quick_fix(
255 name.strip() + " ",
256 proposed_title="Fix spacing",
257 )
258 ],
259 )
260 if not end_of_email_identified: 260 ↛ 267line 260 didn't jump to line 267 because the condition on line 260 was never true
261 # If we are unsure where the email is, we currently do not have the wherewithal to identify
262 # where the date is. Technically, there are cases where we could identify the date and work
263 # back from there. Not written because I thought it is too much effort for the value at the
264 # time I put in this comment.
265 #
266 # Note this will already have triggered a diagnostic.
267 return
268 post_email = line[email_end_idx + 1 :]
269 if not post_email or post_email.isspace():
270 lint_state.emit_diagnostic(
271 _single_line_subrange(line_no, 0, line_len),
272 "Missing sign off date",
273 "error",
274 DPM_DCH_SECTION,
275 )
276 return
277 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1
278 space_len = start_date_idx - email_end_idx - 1
279 if line[email_end_idx + 1 : start_date_idx] != " ":
280 correction = " "
281 diag_start = email_end_idx + 1
282 diag_end = start_date_idx
283 if not space_len:
284 # If there is no spaces, then we mark the closing `>` and the following character instead if possible.
285 #
286 # Note the choice here of including both boundary characters is because some editors refuse to include
287 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range
288 # detection will miss it. By including the following character, we ensure there is always a two
289 # character range to place the cursor in the middle of and the editors tend to respect that as a valid
290 # range (also, single character ranges are harder for the user to see).
291 diag_start = email_end_idx
292 diag_end = min(start_date_idx + 1, line_len)
293 if start_date_idx < line_len: 293 ↛ 296line 293 didn't jump to line 296 because the condition on line 293 was always true
294 end_char = line[start_date_idx]
295 else:
296 end_char = ""
297 correction = f"> {end_char}"
299 lint_state.emit_diagnostic(
300 _single_line_subrange(
301 line_no,
302 diag_start,
303 diag_end,
304 ),
305 "Must be exactly two spaces between email and sign off date",
306 "error",
307 DPM_DCH_SECTION,
308 quickfixes=[
309 propose_correct_text_quick_fix(
310 correction,
311 proposed_title="Fix spacing between email and date",
312 ),
313 ],
314 )
315 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)
318def _offset_of(
319 text: str,
320 ch: str,
321 /,
322 start: Optional[int] = None,
323 end: Optional[int] = None,
324 *,
325 offset_if_missing: int = -1,
326) -> int:
327 try:
328 return text.index(ch, start, end)
329 except ValueError:
330 return offset_if_missing
333def _roffset_of(
334 text: str,
335 ch: str,
336 /,
337 start: Optional[int] = None,
338 end: Optional[int] = None,
339 *,
340 offset_if_missing: int = -1,
341) -> int:
342 try:
343 return text.rindex(ch, start, end)
344 except ValueError:
345 return offset_if_missing
348def _single_line_subrange(
349 line_no: int,
350 character_start_pos: int,
351 character_end_pos: int,
352) -> "TERange":
353 return TERange(
354 TEPosition(
355 line_no,
356 character_start_pos,
357 ),
358 TEPosition(
359 line_no,
360 character_end_pos,
361 ),
362 )
365def _check_header_line(
366 lint_state: LintState,
367 line: str,
368 line_no: int,
369 entry_no: int,
370) -> None:
371 m = _HEADER_LINE.search(line)
372 if not m: 372 ↛ 374line 372 didn't jump to line 374 because the condition on line 372 was never true
373 # Syntax error: TODO flag later
374 return
375 source_name, source_version = m.groups()
376 dctrl_source_pkg = lint_state.source_package
377 if (
378 entry_no == 1
379 and dctrl_source_pkg is not None
380 and dctrl_source_pkg.fields.get("Source") != source_name
381 ):
382 expected_name = dctrl_source_pkg.fields.get("Source")
383 start_pos, end_pos = m.span(1)
384 name_range = _single_line_subrange(line_no, start_pos, end_pos)
385 if expected_name is None: 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true
386 msg = (
387 "The first entry must use the same source name as debian/control."
388 ' The d/control file is missing the "Source" field in its first stanza'
389 )
390 else:
391 msg = (
392 "The first entry must use the same source name as debian/control."
393 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'
394 )
396 lint_state.emit_diagnostic(
397 name_range,
398 msg,
399 "error",
400 "dpkg", # man:deb-src-control(5) / #1089794
401 )
402 if not PKGVERSION_REGEX.fullmatch(source_version):
403 vm = PKGVERSION_REGEX.search(source_version)
404 start_pos, end_pos = m.span(2)
405 if vm:
406 start_valid, end_valid = vm.span(0)
407 invalid_ranges = []
408 if start_valid > 0: 408 ↛ 416line 408 didn't jump to line 416 because the condition on line 408 was always true
409 name_range = _single_line_subrange(
410 line_no,
411 start_pos,
412 start_pos + start_valid,
413 )
414 invalid_ranges.append(name_range)
416 if end_valid < len(source_version): 416 ↛ 424line 416 didn't jump to line 424 because the condition on line 416 was always true
417 name_range = _single_line_subrange(
418 line_no,
419 start_pos + end_valid,
420 end_pos,
421 )
422 invalid_ranges.append(name_range)
424 for r in invalid_ranges:
425 lint_state.emit_diagnostic(
426 r,
427 "This part cannot be parsed as a valid Debian version",
428 "error",
429 "Policy 5.6.12",
430 )
431 else:
432 name_range = _single_line_subrange(line_no, start_pos, end_pos)
433 lint_state.emit_diagnostic(
434 name_range,
435 f'Cannot parse "{source_version}" as a Debian version.',
436 "error",
437 "Policy 5.6.12",
438 )
439 elif "dsfg" in source_version:
440 typo_index = source_version.index("dsfg")
441 start_pos, end_pos = m.span(2)
443 name_range = _single_line_subrange(
444 line_no,
445 start_pos + typo_index,
446 start_pos + typo_index + 4,
447 )
448 lint_state.emit_diagnostic(
449 name_range,
450 'Typo of "dfsg" (Debian Free Software Guidelines)',
451 "pedantic",
452 "debputy",
453 quickfixes=[propose_correct_text_quick_fix("dfsg")],
454 )
457@lint_diagnostics(_DISPATCH_RULE)
458async def _lint_debian_changelog(lint_state: LintState) -> None:
459 lines = lint_state.lines
460 entry_no = 0
461 entry_limit = 2
462 max_words = 1000
463 max_line_length = _MAXIMUM_WIDTH
464 for line_no, line in enumerate(lines):
465 orig_line = line
466 line = line.rstrip()
467 if not line:
468 continue
469 if line.startswith(" --"):
470 _check_footer_line(lint_state, line, line_no)
471 continue
472 if not line.startswith(" "):
473 if not line[0].isspace(): 473 ↛ 485line 473 didn't jump to line 485 because the condition on line 473 was always true
474 entry_no += 1
475 # Figure out the right cut which may not be as simple as just the
476 # top two.
477 if entry_no > entry_limit:
478 break
479 _check_header_line(
480 lint_state,
481 line,
482 line_no,
483 entry_no,
484 )
485 continue
486 # minus 1 for newline
487 orig_line_len = len(orig_line) - 1
488 if orig_line_len > max_line_length:
489 exceeded_line_range = _single_line_subrange(
490 line_no,
491 max_line_length,
492 orig_line_len,
493 )
494 lint_state.emit_diagnostic(
495 exceeded_line_range,
496 f"Line exceeds {max_line_length} characters",
497 "pedantic",
498 "debputy",
499 )
500 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 500 ↛ 502line 500 didn't jump to line 502 because the condition on line 500 was never true
501 # Do not spell check [ X ] as X is usually a name
502 continue
503 if max_words > 0: 503 ↛ 464line 503 didn't jump to line 464 because the condition on line 503 was always true
504 new_diagnostics = spellcheck_line(lint_state, line_no, line)
505 max_words -= new_diagnostics