Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%
215 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-06-16 19:34 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2026-06-16 19:34 +0000
1import re
2from email.utils import parsedate_to_datetime
3from typing import TYPE_CHECKING, Sequence, Optional
5from debputy.linting.lint_util import LintState, te_range_to_lsp
6from debputy.lsp.lsp_features import (
7 lsp_standard_handler,
8 SecondaryLanguage,
9 LanguageDispatchRule,
10 lint_diagnostics,
11 lsp_document_link,
12)
13from debputy.lsp.quickfixes import (
14 propose_correct_text_quick_fix,
15)
16from debputy.lsp.spellchecking import spellcheck_line
17from debputy.util import PKGVERSION_REGEX
19try:
20 from debian._deb822_repro.locatable import (
21 Position as TEPosition,
22 Range as TERange,
23 )
25 from pygls.server import LanguageServer
26 from pygls.workspace import TextDocument
27 from debputy.lsp.debputy_ls import DebputyLanguageServer
28except ImportError:
29 pass
32if TYPE_CHECKING:
33 import lsprotocol.types as types
34else:
35 import debputy.lsprotocol.types as types
38# Lintian has 82 due Parse::DebianChangelog adding an extra space.
39# Since we do not get an extra initial space, we should be one less than
40# Lintian.
41_MAXIMUM_WIDTH: int = 81
42_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest
43_DISPATCH_RULE = LanguageDispatchRule.new_rule(
44 "debian/changelog",
45 None,
46 ("debian/changelog", "debian/changelog.dch"),
47 [
48 # emacs's name
49 SecondaryLanguage("debian-changelog"),
50 # vim's name
51 SecondaryLanguage("debchangelog"),
52 SecondaryLanguage("dch"),
53 ],
54)
57_WEEKDAYS_BY_IDX = [
58 "Mon",
59 "Tue",
60 "Wed",
61 "Thu",
62 "Fri",
63 "Sat",
64 "Sun",
65]
66_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)
67_BUG_LINKS_FINDER_REGEX = re.compile(
68 r"""
69 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)*
70 """,
71 re.I | re.VERBOSE,
72)
73_INDIVIDUAL_BUGS_REGEX = re.compile(
74 r"""(?:bug\s?)?(?:#\s?)?(\d+)""",
75 re.I,
76)
79lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION)
80lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)
82DPM_DCH_SECTION = "Policy 4.4"
85def _check_footer_date(
86 lint_state: LintState,
87 line: str,
88 line_no: int,
89 line_len: int,
90 start_date_idx: int,
91) -> None:
92 # 3 characters for the day name (Mon), then a comma plus a space followed by the
93 # actual date. The 6 characters limit is a gross under estimation of the real
94 # size.
95 if line_len < start_date_idx + 6:
96 text_range = _single_line_subrange(line_no, start_date_idx, line_len)
97 lint_state.emit_diagnostic(
98 text_range,
99 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",
100 "error",
101 DPM_DCH_SECTION,
102 )
103 return
104 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)
105 day_name = line[start_date_idx : start_date_idx + 3]
106 if day_name not in _KNOWN_WEEK_DAYS:
107 lint_state.emit_diagnostic(
108 day_name_range,
109 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",
110 "error",
111 DPM_DCH_SECTION,
112 )
113 return
115 date_str = line[start_date_idx + 5 :]
117 if line[start_date_idx + 3 : start_date_idx + 5] != ", ":
118 sep = line[start_date_idx + 3 : start_date_idx + 5]
119 text_range = _single_line_subrange(
120 line_no,
121 start_date_idx + 3,
122 start_date_idx + 5,
123 )
124 lint_state.emit_diagnostic(
125 text_range,
126 f'Improper formatting of date. Expected ", " here, not "{sep}"',
127 "error",
128 DPM_DCH_SECTION,
129 )
130 return
132 try:
133 # FIXME: this parser is too forgiving (it ignores trailing garbage)
134 date = parsedate_to_datetime(date_str)
135 except ValueError as e:
136 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)
137 lint_state.emit_diagnostic(
138 error_range,
139 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",
140 "error",
141 "debputy",
142 )
143 return
144 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]
145 if expected_week_day != day_name:
146 lint_state.emit_diagnostic(
147 day_name_range,
148 f"The date was a {expected_week_day}day",
149 "warning",
150 "debputy",
151 quickfixes=[propose_correct_text_quick_fix(expected_week_day)],
152 )
155def _check_email(
156 lint_state: LintState,
157 line: str,
158 line_no: int,
159 line_len: int,
160) -> tuple[int, int]:
161 email_start_idx = _offset_of(line, "<", 4)
162 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))
164 if not (3 < email_start_idx < email_end_idx):
165 # Email invalid
166 if email_start_idx >= 3:
167 msg = 'Missing closing ">" to finish email address before the sign off date'
168 diag_start = email_start_idx
169 diag_end = _offset_of(
170 line,
171 " ",
172 email_start_idx,
173 offset_if_missing=line_len,
174 )
175 else:
176 if email_end_idx > -1:
177 diag_start = 1 + _roffset_of(
178 line,
179 " ",
180 4,
181 email_end_idx - 1,
182 offset_if_missing=4,
183 )
184 if diag_start > 4: 184 ↛ 187line 184 didn't jump to line 187 because the condition on line 184 was always true
185 email_start_idx = diag_start
186 # For consistency, we always include the trailing `>`.
187 diag_end = email_end_idx + 1
188 else:
189 diag_start = 4
190 diag_end = _offset_of(
191 line,
192 " ",
193 diag_start,
194 offset_if_missing=line_len,
195 )
196 msg = 'Missing opening "<" to start the email address after the name'
197 lint_state.emit_diagnostic(
198 _single_line_subrange(line_no, diag_start, diag_end),
199 msg,
200 "error",
201 DPM_DCH_SECTION,
202 )
203 if email_end_idx < 0:
204 email_end_idx = diag_end
205 return email_start_idx, email_end_idx
208def _check_footer_line(
209 lint_state: LintState,
210 line: str,
211 line_no: int,
212) -> None:
213 if line.rstrip() == " --":
214 lint_state.emit_diagnostic(
215 _single_line_subrange(line_no, 0, 3),
216 'Missing "Name <email@example.com>"',
217 "error",
218 DPM_DCH_SECTION,
219 # TODO: Add quick fix to insert all the relevant data.
220 )
221 return
222 line_len = len(line)
223 if not line.startswith(" -- "):
224 # Pre-condition for this function being called.
225 assert line.startswith(" --") and line_len > 3
226 lint_state.emit_diagnostic(
227 _single_line_subrange(line_no, 0, line_len),
228 'Start of sign-off line should be " -- ".',
229 "error",
230 DPM_DCH_SECTION,
231 quickfixes=[propose_correct_text_quick_fix(" -- ")],
232 )
233 return
235 email_start_idx, email_end_idx = _check_email(
236 lint_state,
237 line,
238 line_no,
239 line_len,
240 )
242 start_of_email_identified = email_start_idx > 3
243 end_of_email_identified = 4 < email_end_idx < line_len
244 if not start_of_email_identified:
245 return
247 # Email valid, name might be missing
248 name_start = 4
249 name_end = email_start_idx
250 name = line[name_start:name_end]
251 if not name or name.isspace():
252 # The range must always be at least one character width. There is no good direction
253 # to expand it in. Picked left because we know there is a space character,
254 if name_end - name_start < 1:
255 name_start -= 1
256 lint_state.emit_diagnostic(
257 _single_line_subrange(line_no, name_start, name_end),
258 "Missing name before email",
259 "error",
260 DPM_DCH_SECTION,
261 )
262 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:
263 lint_state.emit_diagnostic(
264 _single_line_subrange(line_no, name_start, name_end),
265 "Non-standard spacing around the name",
266 "warning",
267 DPM_DCH_SECTION,
268 quickfixes=[
269 propose_correct_text_quick_fix(
270 name.strip() + " ",
271 proposed_title="Fix spacing",
272 )
273 ],
274 )
275 if not end_of_email_identified: 275 ↛ 282line 275 didn't jump to line 282 because the condition on line 275 was never true
276 # If we are unsure where the email is, we currently do not have the wherewithal to identify
277 # where the date is. Technically, there are cases where we could identify the date and work
278 # back from there. Not written because I thought it is too much effort for the value at the
279 # time I put in this comment.
280 #
281 # Note this will already have triggered a diagnostic.
282 return
283 post_email = line[email_end_idx + 1 :]
284 if not post_email or post_email.isspace():
285 lint_state.emit_diagnostic(
286 _single_line_subrange(line_no, 0, line_len),
287 "Missing sign off date",
288 "error",
289 DPM_DCH_SECTION,
290 )
291 return
292 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1
293 space_len = start_date_idx - email_end_idx - 1
294 if line[email_end_idx + 1 : start_date_idx] != " ":
295 correction = " "
296 diag_start = email_end_idx + 1
297 diag_end = start_date_idx
298 if not space_len:
299 # If there is no spaces, then we mark the closing `>` and the following character instead if possible.
300 #
301 # Note the choice here of including both boundary characters is because some editors refuse to include
302 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range
303 # detection will miss it. By including the following character, we ensure there is always a two
304 # character range to place the cursor in the middle of and the editors tend to respect that as a valid
305 # range (also, single character ranges are harder for the user to see).
306 diag_start = email_end_idx
307 diag_end = min(start_date_idx + 1, line_len)
308 if start_date_idx < line_len: 308 ↛ 311line 308 didn't jump to line 311 because the condition on line 308 was always true
309 end_char = line[start_date_idx]
310 else:
311 end_char = ""
312 correction = f"> {end_char}"
314 lint_state.emit_diagnostic(
315 _single_line_subrange(
316 line_no,
317 diag_start,
318 diag_end,
319 ),
320 "Must be exactly two spaces between email and sign off date",
321 "error",
322 DPM_DCH_SECTION,
323 quickfixes=[
324 propose_correct_text_quick_fix(
325 correction,
326 proposed_title="Fix spacing between email and date",
327 ),
328 ],
329 )
330 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)
333def _offset_of(
334 text: str,
335 ch: str,
336 /,
337 start: int | None = None,
338 end: int | None = None,
339 *,
340 offset_if_missing: int = -1,
341) -> int:
342 try:
343 return text.index(ch, start, end)
344 except ValueError:
345 return offset_if_missing
348def _roffset_of(
349 text: str,
350 ch: str,
351 /,
352 start: int | None = None,
353 end: int | None = None,
354 *,
355 offset_if_missing: int = -1,
356) -> int:
357 try:
358 return text.rindex(ch, start, end)
359 except ValueError:
360 return offset_if_missing
363def _single_line_subrange(
364 line_no: int,
365 character_start_pos: int,
366 character_end_pos: int,
367) -> "TERange":
368 return TERange(
369 TEPosition(
370 line_no,
371 character_start_pos,
372 ),
373 TEPosition(
374 line_no,
375 character_end_pos,
376 ),
377 )
380def _check_header_line(
381 lint_state: LintState,
382 line: str,
383 line_no: int,
384 entry_no: int,
385) -> None:
386 m = _HEADER_LINE.search(line)
387 if not m: 387 ↛ 389line 387 didn't jump to line 389 because the condition on line 387 was never true
388 # Syntax error: TODO flag later
389 return
390 source_name, source_version = m.groups()
391 dctrl_source_pkg = lint_state.source_package
392 if (
393 entry_no == 1
394 and dctrl_source_pkg is not None
395 and dctrl_source_pkg.fields.get("Source") != source_name
396 ):
397 expected_name = dctrl_source_pkg.fields.get("Source")
398 start_pos, end_pos = m.span(1)
399 name_range = _single_line_subrange(line_no, start_pos, end_pos)
400 if expected_name is None: 400 ↛ 401line 400 didn't jump to line 401 because the condition on line 400 was never true
401 msg = (
402 "The first entry must use the same source name as debian/control."
403 ' The d/control file is missing the "Source" field in its first stanza'
404 )
405 else:
406 msg = (
407 "The first entry must use the same source name as debian/control."
408 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'
409 )
411 lint_state.emit_diagnostic(
412 name_range,
413 msg,
414 "error",
415 "dpkg", # man:deb-src-control(5) / #1089794
416 )
417 if not PKGVERSION_REGEX.fullmatch(source_version):
418 vm = PKGVERSION_REGEX.search(source_version)
419 start_pos, end_pos = m.span(2)
420 if vm:
421 start_valid, end_valid = vm.span(0)
422 invalid_ranges = []
423 if start_valid > 0: 423 ↛ 431line 423 didn't jump to line 431 because the condition on line 423 was always true
424 name_range = _single_line_subrange(
425 line_no,
426 start_pos,
427 start_pos + start_valid,
428 )
429 invalid_ranges.append(name_range)
431 if end_valid < len(source_version): 431 ↛ 439line 431 didn't jump to line 439 because the condition on line 431 was always true
432 name_range = _single_line_subrange(
433 line_no,
434 start_pos + end_valid,
435 end_pos,
436 )
437 invalid_ranges.append(name_range)
439 for r in invalid_ranges:
440 lint_state.emit_diagnostic(
441 r,
442 "This part cannot be parsed as a valid Debian version",
443 "error",
444 "Policy 5.6.12",
445 )
446 else:
447 name_range = _single_line_subrange(line_no, start_pos, end_pos)
448 lint_state.emit_diagnostic(
449 name_range,
450 f'Cannot parse "{source_version}" as a Debian version.',
451 "error",
452 "Policy 5.6.12",
453 )
454 elif "dsfg" in source_version:
455 typo_index = source_version.index("dsfg")
456 start_pos, end_pos = m.span(2)
458 name_range = _single_line_subrange(
459 line_no,
460 start_pos + typo_index,
461 start_pos + typo_index + 4,
462 )
463 lint_state.emit_diagnostic(
464 name_range,
465 'Typo of "dfsg" (Debian Free Software Guidelines)',
466 "pedantic",
467 "debputy",
468 quickfixes=[propose_correct_text_quick_fix("dfsg")],
469 )
472@lint_diagnostics(_DISPATCH_RULE)
473async def _lint_debian_changelog(lint_state: LintState) -> None:
474 lines = lint_state.lines
475 entry_no = 0
476 entry_limit = 2
477 max_words = 1000
478 max_line_length = _MAXIMUM_WIDTH
479 for line_no, line in enumerate(lines):
480 orig_line = line
481 line = line.rstrip()
482 if not line:
483 continue
484 if line.startswith(" --"):
485 _check_footer_line(lint_state, line, line_no)
486 continue
487 if not line.startswith(" "):
488 if not line[0].isspace(): 488 ↛ 500line 488 didn't jump to line 500 because the condition on line 488 was always true
489 entry_no += 1
490 # Figure out the right cut which may not be as simple as just the
491 # top two.
492 if entry_no > entry_limit:
493 break
494 _check_header_line(
495 lint_state,
496 line,
497 line_no,
498 entry_no,
499 )
500 continue
501 orig_line_len = len(orig_line)
502 if orig_line_len > max_line_length:
503 # We shift the range one to the left. We do not want the range to include the newline
504 # (each editor handle that differently).
505 exceeded_line_range = _single_line_subrange(
506 line_no,
507 max_line_length - 1,
508 orig_line_len - 1,
509 )
510 lint_state.emit_diagnostic(
511 exceeded_line_range,
512 f"Line exceeds {max_line_length} characters",
513 "pedantic",
514 "debputy",
515 )
516 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 516 ↛ 518line 516 didn't jump to line 518 because the condition on line 516 was never true
517 # Do not spell check [ X ] as X is usually a name
518 continue
519 if max_words > 0: 519 ↛ 479line 519 didn't jump to line 479 because the condition on line 519 was always true
520 new_diagnostics = spellcheck_line(lint_state, line_no, line)
521 max_words -= new_diagnostics
524@lsp_document_link(_DISPATCH_RULE)
525def _debian_changelog_links(
526 ls: "DebputyLanguageServer",
527 params: types.DocumentLinkParams,
528) -> Optional[Sequence[types.DocumentLink]]:
529 doc = ls.workspace.get_text_document(params.text_document.uri)
530 lines = doc.lines
531 links = []
533 for line_no, line in enumerate(lines):
534 if not line.startswith(" "):
535 continue
536 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line)
537 if not bug_line_match:
538 continue
539 bug_offset = bug_line_match.start(0)
540 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)):
541 bug_id = bug_match.group(1)
542 bug_span = bug_match.span()
543 bug_range = _single_line_subrange(
544 line_no,
545 bug_span[0] + bug_offset,
546 bug_span[1] + bug_offset,
547 )
548 bug_range_client_units = doc.position_codec.range_to_client_units(
549 lines,
550 te_range_to_lsp(bug_range),
551 )
552 links.append(
553 types.DocumentLink(
554 bug_range_client_units, f"https://bugs.debian.org/{bug_id}"
555 )
556 )
558 total_links = len(links)
559 if total_links >= 100: 559 ↛ 560line 559 didn't jump to line 560 because the condition on line 559 was never true
560 break
562 return links