Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%
215 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-10-12 15:06 +0000
1import asyncio
2import re
3from email.utils import parsedate_to_datetime
4from typing import TYPE_CHECKING, Sequence, Optional
6from debputy.linting.lint_util import LintState
7from debputy.lsp.lsp_features import (
8 lsp_standard_handler,
9 SecondaryLanguage,
10 LanguageDispatchRule,
11 lint_diagnostics,
12 lsp_document_link,
13)
14from debputy.lsp.quickfixes import (
15 propose_correct_text_quick_fix,
16)
17from debputy.lsp.spellchecking import spellcheck_line
18from debputy.util import PKGVERSION_REGEX
20try:
21 from debputy.lsp.vendoring._deb822_repro.locatable import (
22 Position as TEPosition,
23 Range as TERange,
24 )
26 from pygls.server import LanguageServer
27 from pygls.workspace import TextDocument
28 from debputy.lsp.debputy_ls import DebputyLanguageServer
29except ImportError:
30 pass
33if TYPE_CHECKING:
34 import lsprotocol.types as types
35else:
36 import debputy.lsprotocol.types as types
39# Same as Lintian
40_MAXIMUM_WIDTH: int = 82
41_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest
42_DISPATCH_RULE = LanguageDispatchRule.new_rule(
43 "debian/changelog",
44 None,
45 ("debian/changelog", "debian/changelog.dch"),
46 [
47 # emacs's name
48 SecondaryLanguage("debian-changelog"),
49 # vim's name
50 SecondaryLanguage("debchangelog"),
51 SecondaryLanguage("dch"),
52 ],
53)
56_WEEKDAYS_BY_IDX = [
57 "Mon",
58 "Tue",
59 "Wed",
60 "Thu",
61 "Fri",
62 "Sat",
63 "Sun",
64]
65_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)
66_BUG_LINKS_FINDER_REGEX = re.compile(
67 r"""
68 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)*
69 """,
70 re.I | re.VERBOSE,
71)
72_INDIVIDUAL_BUGS_REGEX = re.compile(
73 r"""(?:bug\s?)?(?:#\s?)?(\d+)""",
74 re.I,
75)
78lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION)
79lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)
81DPM_DCH_SECTION = "Policy 4.4"
84def _check_footer_date(
85 lint_state: LintState,
86 line: str,
87 line_no: int,
88 line_len: int,
89 start_date_idx: int,
90) -> None:
91 # 3 characters for the day name (Mon), then a comma plus a space followed by the
92 # actual date. The 6 characters limit is a gross under estimation of the real
93 # size.
94 if line_len < start_date_idx + 6:
95 text_range = _single_line_subrange(line_no, start_date_idx, line_len)
96 lint_state.emit_diagnostic(
97 text_range,
98 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",
99 "error",
100 DPM_DCH_SECTION,
101 )
102 return
103 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)
104 day_name = line[start_date_idx : start_date_idx + 3]
105 if day_name not in _KNOWN_WEEK_DAYS:
106 lint_state.emit_diagnostic(
107 day_name_range,
108 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",
109 "error",
110 DPM_DCH_SECTION,
111 )
112 return
114 date_str = line[start_date_idx + 5 :]
116 if line[start_date_idx + 3 : start_date_idx + 5] != ", ":
117 sep = line[start_date_idx + 3 : start_date_idx + 5]
118 text_range = _single_line_subrange(
119 line_no,
120 start_date_idx + 3,
121 start_date_idx + 5,
122 )
123 lint_state.emit_diagnostic(
124 text_range,
125 f'Improper formatting of date. Expected ", " here, not "{sep}"',
126 "error",
127 DPM_DCH_SECTION,
128 )
129 return
131 try:
132 # FIXME: this parser is too forgiving (it ignores trailing garbage)
133 date = parsedate_to_datetime(date_str)
134 except ValueError as e:
135 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)
136 lint_state.emit_diagnostic(
137 error_range,
138 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",
139 "error",
140 "debputy",
141 )
142 return
143 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]
144 if expected_week_day != day_name:
145 lint_state.emit_diagnostic(
146 day_name_range,
147 f"The date was a {expected_week_day}day",
148 "warning",
149 "debputy",
150 quickfixes=[propose_correct_text_quick_fix(expected_week_day)],
151 )
154def _check_email(
155 lint_state: LintState,
156 line: str,
157 line_no: int,
158 line_len: int,
159) -> tuple[int, int]:
160 email_start_idx = _offset_of(line, "<", 4)
161 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))
163 if not (3 < email_start_idx < email_end_idx):
164 # Email invalid
165 if email_start_idx >= 3:
166 msg = 'Missing closing ">" to finish email address before the sign off date'
167 diag_start = email_start_idx
168 diag_end = _offset_of(
169 line,
170 " ",
171 email_start_idx,
172 offset_if_missing=line_len,
173 )
174 else:
175 if email_end_idx > -1:
176 diag_start = 1 + _roffset_of(
177 line,
178 " ",
179 4,
180 email_end_idx - 1,
181 offset_if_missing=4,
182 )
183 if diag_start > 4: 183 ↛ 186line 183 didn't jump to line 186 because the condition on line 183 was always true
184 email_start_idx = diag_start
185 # For consistency, we always include the trailing `>`.
186 diag_end = email_end_idx + 1
187 else:
188 diag_start = 4
189 diag_end = _offset_of(
190 line,
191 " ",
192 diag_start,
193 offset_if_missing=line_len,
194 )
195 msg = 'Missing opening "<" to start the email address after the name'
196 lint_state.emit_diagnostic(
197 _single_line_subrange(line_no, diag_start, diag_end),
198 msg,
199 "error",
200 DPM_DCH_SECTION,
201 )
202 if email_end_idx < 0:
203 email_end_idx = diag_end
204 return email_start_idx, email_end_idx
207def _check_footer_line(
208 lint_state: LintState,
209 line: str,
210 line_no: int,
211) -> None:
212 if line.rstrip() == " --":
213 lint_state.emit_diagnostic(
214 _single_line_subrange(line_no, 0, 3),
215 'Missing "Name <email@example.com>"',
216 "error",
217 DPM_DCH_SECTION,
218 # TODO: Add quick fix to insert all the relevant data.
219 )
220 return
221 line_len = len(line)
222 if not line.startswith(" -- "):
223 # Pre-condition for this function being called.
224 assert line.startswith(" --") and line_len > 3
225 lint_state.emit_diagnostic(
226 _single_line_subrange(line_no, 0, line_len),
227 'Start of sign-off line should be " -- ".',
228 "error",
229 DPM_DCH_SECTION,
230 quickfixes=[propose_correct_text_quick_fix(" -- ")],
231 )
232 return
234 email_start_idx, email_end_idx = _check_email(
235 lint_state,
236 line,
237 line_no,
238 line_len,
239 )
241 start_of_email_identified = email_start_idx > 3
242 end_of_email_identified = 4 < email_end_idx < line_len
243 if not start_of_email_identified:
244 return
246 # Email valid, name might be missing
247 name_start = 4
248 name_end = email_start_idx
249 name = line[name_start:name_end]
250 if not name or name.isspace():
251 # The range must always be at least one character width. There is no good direction
252 # to expand it in. Picked left because we know there is a space character,
253 if name_end - name_start < 1:
254 name_start -= 1
255 lint_state.emit_diagnostic(
256 _single_line_subrange(line_no, name_start, name_end),
257 "Missing name before email",
258 "error",
259 DPM_DCH_SECTION,
260 )
261 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:
262 lint_state.emit_diagnostic(
263 _single_line_subrange(line_no, name_start, name_end),
264 "Non-standard spacing around the name",
265 "warning",
266 DPM_DCH_SECTION,
267 quickfixes=[
268 propose_correct_text_quick_fix(
269 name.strip() + " ",
270 proposed_title="Fix spacing",
271 )
272 ],
273 )
274 if not end_of_email_identified: 274 ↛ 281line 274 didn't jump to line 281 because the condition on line 274 was never true
275 # If we are unsure where the email is, we currently do not have the wherewithal to identify
276 # where the date is. Technically, there are cases where we could identify the date and work
277 # back from there. Not written because I thought it is too much effort for the value at the
278 # time I put in this comment.
279 #
280 # Note this will already have triggered a diagnostic.
281 return
282 post_email = line[email_end_idx + 1 :]
283 if not post_email or post_email.isspace():
284 lint_state.emit_diagnostic(
285 _single_line_subrange(line_no, 0, line_len),
286 "Missing sign off date",
287 "error",
288 DPM_DCH_SECTION,
289 )
290 return
291 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1
292 space_len = start_date_idx - email_end_idx - 1
293 if line[email_end_idx + 1 : start_date_idx] != " ":
294 correction = " "
295 diag_start = email_end_idx + 1
296 diag_end = start_date_idx
297 if not space_len:
298 # If there is no spaces, then we mark the closing `>` and the following character instead if possible.
299 #
300 # Note the choice here of including both boundary characters is because some editors refuse to include
301 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range
302 # detection will miss it. By including the following character, we ensure there is always a two
303 # character range to place the cursor in the middle of and the editors tend to respect that as a valid
304 # range (also, single character ranges are harder for the user to see).
305 diag_start = email_end_idx
306 diag_end = min(start_date_idx + 1, line_len)
307 if start_date_idx < line_len: 307 ↛ 310line 307 didn't jump to line 310 because the condition on line 307 was always true
308 end_char = line[start_date_idx]
309 else:
310 end_char = ""
311 correction = f"> {end_char}"
313 lint_state.emit_diagnostic(
314 _single_line_subrange(
315 line_no,
316 diag_start,
317 diag_end,
318 ),
319 "Must be exactly two spaces between email and sign off date",
320 "error",
321 DPM_DCH_SECTION,
322 quickfixes=[
323 propose_correct_text_quick_fix(
324 correction,
325 proposed_title="Fix spacing between email and date",
326 ),
327 ],
328 )
329 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)
332def _offset_of(
333 text: str,
334 ch: str,
335 /,
336 start: int | None = None,
337 end: int | None = None,
338 *,
339 offset_if_missing: int = -1,
340) -> int:
341 try:
342 return text.index(ch, start, end)
343 except ValueError:
344 return offset_if_missing
347def _roffset_of(
348 text: str,
349 ch: str,
350 /,
351 start: int | None = None,
352 end: int | None = None,
353 *,
354 offset_if_missing: int = -1,
355) -> int:
356 try:
357 return text.rindex(ch, start, end)
358 except ValueError:
359 return offset_if_missing
362def _single_line_subrange(
363 line_no: int,
364 character_start_pos: int,
365 character_end_pos: int,
366) -> "TERange":
367 return TERange(
368 TEPosition(
369 line_no,
370 character_start_pos,
371 ),
372 TEPosition(
373 line_no,
374 character_end_pos,
375 ),
376 )
379def _check_header_line(
380 lint_state: LintState,
381 line: str,
382 line_no: int,
383 entry_no: int,
384) -> None:
385 m = _HEADER_LINE.search(line)
386 if not m: 386 ↛ 388line 386 didn't jump to line 388 because the condition on line 386 was never true
387 # Syntax error: TODO flag later
388 return
389 source_name, source_version = m.groups()
390 dctrl_source_pkg = lint_state.source_package
391 if (
392 entry_no == 1
393 and dctrl_source_pkg is not None
394 and dctrl_source_pkg.fields.get("Source") != source_name
395 ):
396 expected_name = dctrl_source_pkg.fields.get("Source")
397 start_pos, end_pos = m.span(1)
398 name_range = _single_line_subrange(line_no, start_pos, end_pos)
399 if expected_name is None: 399 ↛ 400line 399 didn't jump to line 400 because the condition on line 399 was never true
400 msg = (
401 "The first entry must use the same source name as debian/control."
402 ' The d/control file is missing the "Source" field in its first stanza'
403 )
404 else:
405 msg = (
406 "The first entry must use the same source name as debian/control."
407 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'
408 )
410 lint_state.emit_diagnostic(
411 name_range,
412 msg,
413 "error",
414 "dpkg", # man:deb-src-control(5) / #1089794
415 )
416 if not PKGVERSION_REGEX.fullmatch(source_version):
417 vm = PKGVERSION_REGEX.search(source_version)
418 start_pos, end_pos = m.span(2)
419 if vm:
420 start_valid, end_valid = vm.span(0)
421 invalid_ranges = []
422 if start_valid > 0: 422 ↛ 430line 422 didn't jump to line 430 because the condition on line 422 was always true
423 name_range = _single_line_subrange(
424 line_no,
425 start_pos,
426 start_pos + start_valid,
427 )
428 invalid_ranges.append(name_range)
430 if end_valid < len(source_version): 430 ↛ 438line 430 didn't jump to line 438 because the condition on line 430 was always true
431 name_range = _single_line_subrange(
432 line_no,
433 start_pos + end_valid,
434 end_pos,
435 )
436 invalid_ranges.append(name_range)
438 for r in invalid_ranges:
439 lint_state.emit_diagnostic(
440 r,
441 "This part cannot be parsed as a valid Debian version",
442 "error",
443 "Policy 5.6.12",
444 )
445 else:
446 name_range = _single_line_subrange(line_no, start_pos, end_pos)
447 lint_state.emit_diagnostic(
448 name_range,
449 f'Cannot parse "{source_version}" as a Debian version.',
450 "error",
451 "Policy 5.6.12",
452 )
453 elif "dsfg" in source_version:
454 typo_index = source_version.index("dsfg")
455 start_pos, end_pos = m.span(2)
457 name_range = _single_line_subrange(
458 line_no,
459 start_pos + typo_index,
460 start_pos + typo_index + 4,
461 )
462 lint_state.emit_diagnostic(
463 name_range,
464 'Typo of "dfsg" (Debian Free Software Guidelines)',
465 "pedantic",
466 "debputy",
467 quickfixes=[propose_correct_text_quick_fix("dfsg")],
468 )
471@lint_diagnostics(_DISPATCH_RULE)
472async def _lint_debian_changelog(lint_state: LintState) -> None:
473 lines = lint_state.lines
474 entry_no = 0
475 entry_limit = 2
476 max_words = 1000
477 max_line_length = _MAXIMUM_WIDTH
478 for line_no, line in enumerate(lines):
479 orig_line = line
480 line = line.rstrip()
481 if not line:
482 continue
483 if line.startswith(" --"):
484 _check_footer_line(lint_state, line, line_no)
485 continue
486 if not line.startswith(" "):
487 if not line[0].isspace(): 487 ↛ 499line 487 didn't jump to line 499 because the condition on line 487 was always true
488 entry_no += 1
489 # Figure out the right cut which may not be as simple as just the
490 # top two.
491 if entry_no > entry_limit:
492 break
493 _check_header_line(
494 lint_state,
495 line,
496 line_no,
497 entry_no,
498 )
499 continue
500 # minus 1 for newline
501 orig_line_len = len(orig_line) - 1
502 if orig_line_len > max_line_length:
503 exceeded_line_range = _single_line_subrange(
504 line_no,
505 max_line_length,
506 orig_line_len,
507 )
508 lint_state.emit_diagnostic(
509 exceeded_line_range,
510 f"Line exceeds {max_line_length} characters",
511 "pedantic",
512 "debputy",
513 )
514 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 514 ↛ 516line 514 didn't jump to line 516 because the condition on line 514 was never true
515 # Do not spell check [ X ] as X is usually a name
516 continue
517 if max_words > 0: 517 ↛ 478line 517 didn't jump to line 478 because the condition on line 517 was always true
518 new_diagnostics = spellcheck_line(lint_state, line_no, line)
519 max_words -= new_diagnostics
522@lsp_document_link(_DISPATCH_RULE)
523def _debian_changelog_links(
524 ls: "DebputyLanguageServer",
525 params: types.DocumentLinkParams,
526) -> Optional[Sequence[types.DocumentLink]]:
527 doc = ls.workspace.get_text_document(params.text_document.uri)
528 lines = doc.lines
529 links = []
531 for line_no, line in enumerate(lines):
532 if not line.startswith(" "):
533 continue
534 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line)
535 if not bug_line_match:
536 continue
537 bug_offset = bug_line_match.start(0)
538 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)):
539 bug_id = bug_match.group(1)
540 bug_span = bug_match.span()
541 bug_range = _single_line_subrange(
542 line_no,
543 bug_span[0] + bug_offset,
544 bug_span[1] + bug_offset,
545 )
546 links.append(
547 types.DocumentLink(bug_range, f"https://bugs.debian.org/{bug_id}")
548 )
550 total_links = len(links)
551 if total_links >= 100: 551 ↛ 552line 551 didn't jump to line 552 because the condition on line 551 was never true
552 break
554 return links