Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%
215 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-02-14 10:41 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2026-02-14 10:41 +0000
1import re
2from email.utils import parsedate_to_datetime
3from typing import TYPE_CHECKING, Sequence, Optional
5from debputy.linting.lint_util import LintState, te_range_to_lsp
6from debputy.lsp.lsp_features import (
7 lsp_standard_handler,
8 SecondaryLanguage,
9 LanguageDispatchRule,
10 lint_diagnostics,
11 lsp_document_link,
12)
13from debputy.lsp.quickfixes import (
14 propose_correct_text_quick_fix,
15)
16from debputy.lsp.spellchecking import spellcheck_line
17from debputy.util import PKGVERSION_REGEX
19try:
20 from debputy.lsp.vendoring._deb822_repro.locatable import (
21 Position as TEPosition,
22 Range as TERange,
23 )
25 from pygls.server import LanguageServer
26 from pygls.workspace import TextDocument
27 from debputy.lsp.debputy_ls import DebputyLanguageServer
28except ImportError:
29 pass
32if TYPE_CHECKING:
33 import lsprotocol.types as types
34else:
35 import debputy.lsprotocol.types as types
38# Same as Lintian
39_MAXIMUM_WIDTH: int = 82
40_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest
41_DISPATCH_RULE = LanguageDispatchRule.new_rule(
42 "debian/changelog",
43 None,
44 ("debian/changelog", "debian/changelog.dch"),
45 [
46 # emacs's name
47 SecondaryLanguage("debian-changelog"),
48 # vim's name
49 SecondaryLanguage("debchangelog"),
50 SecondaryLanguage("dch"),
51 ],
52)
55_WEEKDAYS_BY_IDX = [
56 "Mon",
57 "Tue",
58 "Wed",
59 "Thu",
60 "Fri",
61 "Sat",
62 "Sun",
63]
64_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)
65_BUG_LINKS_FINDER_REGEX = re.compile(
66 r"""
67 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)*
68 """,
69 re.I | re.VERBOSE,
70)
71_INDIVIDUAL_BUGS_REGEX = re.compile(
72 r"""(?:bug\s?)?(?:#\s?)?(\d+)""",
73 re.I,
74)
77lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION)
78lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)
80DPM_DCH_SECTION = "Policy 4.4"
83def _check_footer_date(
84 lint_state: LintState,
85 line: str,
86 line_no: int,
87 line_len: int,
88 start_date_idx: int,
89) -> None:
90 # 3 characters for the day name (Mon), then a comma plus a space followed by the
91 # actual date. The 6 characters limit is a gross under estimation of the real
92 # size.
93 if line_len < start_date_idx + 6:
94 text_range = _single_line_subrange(line_no, start_date_idx, line_len)
95 lint_state.emit_diagnostic(
96 text_range,
97 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",
98 "error",
99 DPM_DCH_SECTION,
100 )
101 return
102 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)
103 day_name = line[start_date_idx : start_date_idx + 3]
104 if day_name not in _KNOWN_WEEK_DAYS:
105 lint_state.emit_diagnostic(
106 day_name_range,
107 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",
108 "error",
109 DPM_DCH_SECTION,
110 )
111 return
113 date_str = line[start_date_idx + 5 :]
115 if line[start_date_idx + 3 : start_date_idx + 5] != ", ":
116 sep = line[start_date_idx + 3 : start_date_idx + 5]
117 text_range = _single_line_subrange(
118 line_no,
119 start_date_idx + 3,
120 start_date_idx + 5,
121 )
122 lint_state.emit_diagnostic(
123 text_range,
124 f'Improper formatting of date. Expected ", " here, not "{sep}"',
125 "error",
126 DPM_DCH_SECTION,
127 )
128 return
130 try:
131 # FIXME: this parser is too forgiving (it ignores trailing garbage)
132 date = parsedate_to_datetime(date_str)
133 except ValueError as e:
134 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)
135 lint_state.emit_diagnostic(
136 error_range,
137 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",
138 "error",
139 "debputy",
140 )
141 return
142 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]
143 if expected_week_day != day_name:
144 lint_state.emit_diagnostic(
145 day_name_range,
146 f"The date was a {expected_week_day}day",
147 "warning",
148 "debputy",
149 quickfixes=[propose_correct_text_quick_fix(expected_week_day)],
150 )
153def _check_email(
154 lint_state: LintState,
155 line: str,
156 line_no: int,
157 line_len: int,
158) -> tuple[int, int]:
159 email_start_idx = _offset_of(line, "<", 4)
160 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))
162 if not (3 < email_start_idx < email_end_idx):
163 # Email invalid
164 if email_start_idx >= 3:
165 msg = 'Missing closing ">" to finish email address before the sign off date'
166 diag_start = email_start_idx
167 diag_end = _offset_of(
168 line,
169 " ",
170 email_start_idx,
171 offset_if_missing=line_len,
172 )
173 else:
174 if email_end_idx > -1:
175 diag_start = 1 + _roffset_of(
176 line,
177 " ",
178 4,
179 email_end_idx - 1,
180 offset_if_missing=4,
181 )
182 if diag_start > 4: 182 ↛ 185line 182 didn't jump to line 185 because the condition on line 182 was always true
183 email_start_idx = diag_start
184 # For consistency, we always include the trailing `>`.
185 diag_end = email_end_idx + 1
186 else:
187 diag_start = 4
188 diag_end = _offset_of(
189 line,
190 " ",
191 diag_start,
192 offset_if_missing=line_len,
193 )
194 msg = 'Missing opening "<" to start the email address after the name'
195 lint_state.emit_diagnostic(
196 _single_line_subrange(line_no, diag_start, diag_end),
197 msg,
198 "error",
199 DPM_DCH_SECTION,
200 )
201 if email_end_idx < 0:
202 email_end_idx = diag_end
203 return email_start_idx, email_end_idx
206def _check_footer_line(
207 lint_state: LintState,
208 line: str,
209 line_no: int,
210) -> None:
211 if line.rstrip() == " --":
212 lint_state.emit_diagnostic(
213 _single_line_subrange(line_no, 0, 3),
214 'Missing "Name <email@example.com>"',
215 "error",
216 DPM_DCH_SECTION,
217 # TODO: Add quick fix to insert all the relevant data.
218 )
219 return
220 line_len = len(line)
221 if not line.startswith(" -- "):
222 # Pre-condition for this function being called.
223 assert line.startswith(" --") and line_len > 3
224 lint_state.emit_diagnostic(
225 _single_line_subrange(line_no, 0, line_len),
226 'Start of sign-off line should be " -- ".',
227 "error",
228 DPM_DCH_SECTION,
229 quickfixes=[propose_correct_text_quick_fix(" -- ")],
230 )
231 return
233 email_start_idx, email_end_idx = _check_email(
234 lint_state,
235 line,
236 line_no,
237 line_len,
238 )
240 start_of_email_identified = email_start_idx > 3
241 end_of_email_identified = 4 < email_end_idx < line_len
242 if not start_of_email_identified:
243 return
245 # Email valid, name might be missing
246 name_start = 4
247 name_end = email_start_idx
248 name = line[name_start:name_end]
249 if not name or name.isspace():
250 # The range must always be at least one character width. There is no good direction
251 # to expand it in. Picked left because we know there is a space character,
252 if name_end - name_start < 1:
253 name_start -= 1
254 lint_state.emit_diagnostic(
255 _single_line_subrange(line_no, name_start, name_end),
256 "Missing name before email",
257 "error",
258 DPM_DCH_SECTION,
259 )
260 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:
261 lint_state.emit_diagnostic(
262 _single_line_subrange(line_no, name_start, name_end),
263 "Non-standard spacing around the name",
264 "warning",
265 DPM_DCH_SECTION,
266 quickfixes=[
267 propose_correct_text_quick_fix(
268 name.strip() + " ",
269 proposed_title="Fix spacing",
270 )
271 ],
272 )
273 if not end_of_email_identified: 273 ↛ 280line 273 didn't jump to line 280 because the condition on line 273 was never true
274 # If we are unsure where the email is, we currently do not have the wherewithal to identify
275 # where the date is. Technically, there are cases where we could identify the date and work
276 # back from there. Not written because I thought it is too much effort for the value at the
277 # time I put in this comment.
278 #
279 # Note this will already have triggered a diagnostic.
280 return
281 post_email = line[email_end_idx + 1 :]
282 if not post_email or post_email.isspace():
283 lint_state.emit_diagnostic(
284 _single_line_subrange(line_no, 0, line_len),
285 "Missing sign off date",
286 "error",
287 DPM_DCH_SECTION,
288 )
289 return
290 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1
291 space_len = start_date_idx - email_end_idx - 1
292 if line[email_end_idx + 1 : start_date_idx] != " ":
293 correction = " "
294 diag_start = email_end_idx + 1
295 diag_end = start_date_idx
296 if not space_len:
297 # If there is no spaces, then we mark the closing `>` and the following character instead if possible.
298 #
299 # Note the choice here of including both boundary characters is because some editors refuse to include
300 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range
301 # detection will miss it. By including the following character, we ensure there is always a two
302 # character range to place the cursor in the middle of and the editors tend to respect that as a valid
303 # range (also, single character ranges are harder for the user to see).
304 diag_start = email_end_idx
305 diag_end = min(start_date_idx + 1, line_len)
306 if start_date_idx < line_len: 306 ↛ 309line 306 didn't jump to line 309 because the condition on line 306 was always true
307 end_char = line[start_date_idx]
308 else:
309 end_char = ""
310 correction = f"> {end_char}"
312 lint_state.emit_diagnostic(
313 _single_line_subrange(
314 line_no,
315 diag_start,
316 diag_end,
317 ),
318 "Must be exactly two spaces between email and sign off date",
319 "error",
320 DPM_DCH_SECTION,
321 quickfixes=[
322 propose_correct_text_quick_fix(
323 correction,
324 proposed_title="Fix spacing between email and date",
325 ),
326 ],
327 )
328 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)
331def _offset_of(
332 text: str,
333 ch: str,
334 /,
335 start: int | None = None,
336 end: int | None = None,
337 *,
338 offset_if_missing: int = -1,
339) -> int:
340 try:
341 return text.index(ch, start, end)
342 except ValueError:
343 return offset_if_missing
346def _roffset_of(
347 text: str,
348 ch: str,
349 /,
350 start: int | None = None,
351 end: int | None = None,
352 *,
353 offset_if_missing: int = -1,
354) -> int:
355 try:
356 return text.rindex(ch, start, end)
357 except ValueError:
358 return offset_if_missing
361def _single_line_subrange(
362 line_no: int,
363 character_start_pos: int,
364 character_end_pos: int,
365) -> "TERange":
366 return TERange(
367 TEPosition(
368 line_no,
369 character_start_pos,
370 ),
371 TEPosition(
372 line_no,
373 character_end_pos,
374 ),
375 )
378def _check_header_line(
379 lint_state: LintState,
380 line: str,
381 line_no: int,
382 entry_no: int,
383) -> None:
384 m = _HEADER_LINE.search(line)
385 if not m: 385 ↛ 387line 385 didn't jump to line 387 because the condition on line 385 was never true
386 # Syntax error: TODO flag later
387 return
388 source_name, source_version = m.groups()
389 dctrl_source_pkg = lint_state.source_package
390 if (
391 entry_no == 1
392 and dctrl_source_pkg is not None
393 and dctrl_source_pkg.fields.get("Source") != source_name
394 ):
395 expected_name = dctrl_source_pkg.fields.get("Source")
396 start_pos, end_pos = m.span(1)
397 name_range = _single_line_subrange(line_no, start_pos, end_pos)
398 if expected_name is None: 398 ↛ 399line 398 didn't jump to line 399 because the condition on line 398 was never true
399 msg = (
400 "The first entry must use the same source name as debian/control."
401 ' The d/control file is missing the "Source" field in its first stanza'
402 )
403 else:
404 msg = (
405 "The first entry must use the same source name as debian/control."
406 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'
407 )
409 lint_state.emit_diagnostic(
410 name_range,
411 msg,
412 "error",
413 "dpkg", # man:deb-src-control(5) / #1089794
414 )
415 if not PKGVERSION_REGEX.fullmatch(source_version):
416 vm = PKGVERSION_REGEX.search(source_version)
417 start_pos, end_pos = m.span(2)
418 if vm:
419 start_valid, end_valid = vm.span(0)
420 invalid_ranges = []
421 if start_valid > 0: 421 ↛ 429line 421 didn't jump to line 429 because the condition on line 421 was always true
422 name_range = _single_line_subrange(
423 line_no,
424 start_pos,
425 start_pos + start_valid,
426 )
427 invalid_ranges.append(name_range)
429 if end_valid < len(source_version): 429 ↛ 437line 429 didn't jump to line 437 because the condition on line 429 was always true
430 name_range = _single_line_subrange(
431 line_no,
432 start_pos + end_valid,
433 end_pos,
434 )
435 invalid_ranges.append(name_range)
437 for r in invalid_ranges:
438 lint_state.emit_diagnostic(
439 r,
440 "This part cannot be parsed as a valid Debian version",
441 "error",
442 "Policy 5.6.12",
443 )
444 else:
445 name_range = _single_line_subrange(line_no, start_pos, end_pos)
446 lint_state.emit_diagnostic(
447 name_range,
448 f'Cannot parse "{source_version}" as a Debian version.',
449 "error",
450 "Policy 5.6.12",
451 )
452 elif "dsfg" in source_version:
453 typo_index = source_version.index("dsfg")
454 start_pos, end_pos = m.span(2)
456 name_range = _single_line_subrange(
457 line_no,
458 start_pos + typo_index,
459 start_pos + typo_index + 4,
460 )
461 lint_state.emit_diagnostic(
462 name_range,
463 'Typo of "dfsg" (Debian Free Software Guidelines)',
464 "pedantic",
465 "debputy",
466 quickfixes=[propose_correct_text_quick_fix("dfsg")],
467 )
470@lint_diagnostics(_DISPATCH_RULE)
471async def _lint_debian_changelog(lint_state: LintState) -> None:
472 lines = lint_state.lines
473 entry_no = 0
474 entry_limit = 2
475 max_words = 1000
476 max_line_length = _MAXIMUM_WIDTH
477 for line_no, line in enumerate(lines):
478 orig_line = line
479 line = line.rstrip()
480 if not line:
481 continue
482 if line.startswith(" --"):
483 _check_footer_line(lint_state, line, line_no)
484 continue
485 if not line.startswith(" "):
486 if not line[0].isspace(): 486 ↛ 498line 486 didn't jump to line 498 because the condition on line 486 was always true
487 entry_no += 1
488 # Figure out the right cut which may not be as simple as just the
489 # top two.
490 if entry_no > entry_limit:
491 break
492 _check_header_line(
493 lint_state,
494 line,
495 line_no,
496 entry_no,
497 )
498 continue
499 # minus 1 for newline
500 orig_line_len = len(orig_line) - 1
501 if orig_line_len > max_line_length:
502 exceeded_line_range = _single_line_subrange(
503 line_no,
504 max_line_length,
505 orig_line_len,
506 )
507 lint_state.emit_diagnostic(
508 exceeded_line_range,
509 f"Line exceeds {max_line_length} characters",
510 "pedantic",
511 "debputy",
512 )
513 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 513 ↛ 515line 513 didn't jump to line 515 because the condition on line 513 was never true
514 # Do not spell check [ X ] as X is usually a name
515 continue
516 if max_words > 0: 516 ↛ 477line 516 didn't jump to line 477 because the condition on line 516 was always true
517 new_diagnostics = spellcheck_line(lint_state, line_no, line)
518 max_words -= new_diagnostics
521@lsp_document_link(_DISPATCH_RULE)
522def _debian_changelog_links(
523 ls: "DebputyLanguageServer",
524 params: types.DocumentLinkParams,
525) -> Optional[Sequence[types.DocumentLink]]:
526 doc = ls.workspace.get_text_document(params.text_document.uri)
527 lines = doc.lines
528 links = []
530 for line_no, line in enumerate(lines):
531 if not line.startswith(" "):
532 continue
533 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line)
534 if not bug_line_match:
535 continue
536 bug_offset = bug_line_match.start(0)
537 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)):
538 bug_id = bug_match.group(1)
539 bug_span = bug_match.span()
540 bug_range = _single_line_subrange(
541 line_no,
542 bug_span[0] + bug_offset,
543 bug_span[1] + bug_offset,
544 )
545 bug_range_client_units = doc.position_codec.range_to_client_units(
546 lines,
547 te_range_to_lsp(bug_range),
548 )
549 links.append(
550 types.DocumentLink(
551 bug_range_client_units, f"https://bugs.debian.org/{bug_id}"
552 )
553 )
555 total_links = len(links)
556 if total_links >= 100: 556 ↛ 557line 556 didn't jump to line 557 because the condition on line 556 was never true
557 break
559 return links