Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%

215 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-06-16 19:34 +0000

1import re 

2from email.utils import parsedate_to_datetime 

3from typing import TYPE_CHECKING, Sequence, Optional 

4 

5from debputy.linting.lint_util import LintState, te_range_to_lsp 

6from debputy.lsp.lsp_features import ( 

7 lsp_standard_handler, 

8 SecondaryLanguage, 

9 LanguageDispatchRule, 

10 lint_diagnostics, 

11 lsp_document_link, 

12) 

13from debputy.lsp.quickfixes import ( 

14 propose_correct_text_quick_fix, 

15) 

16from debputy.lsp.spellchecking import spellcheck_line 

17from debputy.util import PKGVERSION_REGEX 

18 

19try: 

20 from debian._deb822_repro.locatable import ( 

21 Position as TEPosition, 

22 Range as TERange, 

23 ) 

24 

25 from pygls.server import LanguageServer 

26 from pygls.workspace import TextDocument 

27 from debputy.lsp.debputy_ls import DebputyLanguageServer 

28except ImportError: 

29 pass 

30 

31 

32if TYPE_CHECKING: 

33 import lsprotocol.types as types 

34else: 

35 import debputy.lsprotocol.types as types 

36 

37 

38# Lintian has 82 due Parse::DebianChangelog adding an extra space. 

39# Since we do not get an extra initial space, we should be one less than 

40# Lintian. 

41_MAXIMUM_WIDTH: int = 81 

42_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest 

43_DISPATCH_RULE = LanguageDispatchRule.new_rule( 

44 "debian/changelog", 

45 None, 

46 ("debian/changelog", "debian/changelog.dch"), 

47 [ 

48 # emacs's name 

49 SecondaryLanguage("debian-changelog"), 

50 # vim's name 

51 SecondaryLanguage("debchangelog"), 

52 SecondaryLanguage("dch"), 

53 ], 

54) 

55 

56 

57_WEEKDAYS_BY_IDX = [ 

58 "Mon", 

59 "Tue", 

60 "Wed", 

61 "Thu", 

62 "Fri", 

63 "Sat", 

64 "Sun", 

65] 

66_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX) 

67_BUG_LINKS_FINDER_REGEX = re.compile( 

68 r""" 

69 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)* 

70 """, 

71 re.I | re.VERBOSE, 

72) 

73_INDIVIDUAL_BUGS_REGEX = re.compile( 

74 r"""(?:bug\s?)?(?:#\s?)?(\d+)""", 

75 re.I, 

76) 

77 

78 

79lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION) 

80lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) 

81 

82DPM_DCH_SECTION = "Policy 4.4" 

83 

84 

85def _check_footer_date( 

86 lint_state: LintState, 

87 line: str, 

88 line_no: int, 

89 line_len: int, 

90 start_date_idx: int, 

91) -> None: 

92 # 3 characters for the day name (Mon), then a comma plus a space followed by the 

93 # actual date. The 6 characters limit is a gross under estimation of the real 

94 # size. 

95 if line_len < start_date_idx + 6: 

96 text_range = _single_line_subrange(line_no, start_date_idx, line_len) 

97 lint_state.emit_diagnostic( 

98 text_range, 

99 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)", 

100 "error", 

101 DPM_DCH_SECTION, 

102 ) 

103 return 

104 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3) 

105 day_name = line[start_date_idx : start_date_idx + 3] 

106 if day_name not in _KNOWN_WEEK_DAYS: 

107 lint_state.emit_diagnostic( 

108 day_name_range, 

109 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)", 

110 "error", 

111 DPM_DCH_SECTION, 

112 ) 

113 return 

114 

115 date_str = line[start_date_idx + 5 :] 

116 

117 if line[start_date_idx + 3 : start_date_idx + 5] != ", ": 

118 sep = line[start_date_idx + 3 : start_date_idx + 5] 

119 text_range = _single_line_subrange( 

120 line_no, 

121 start_date_idx + 3, 

122 start_date_idx + 5, 

123 ) 

124 lint_state.emit_diagnostic( 

125 text_range, 

126 f'Improper formatting of date. Expected ", " here, not "{sep}"', 

127 "error", 

128 DPM_DCH_SECTION, 

129 ) 

130 return 

131 

132 try: 

133 # FIXME: this parser is too forgiving (it ignores trailing garbage) 

134 date = parsedate_to_datetime(date_str) 

135 except ValueError as e: 

136 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len) 

137 lint_state.emit_diagnostic( 

138 error_range, 

139 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}", 

140 "error", 

141 "debputy", 

142 ) 

143 return 

144 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()] 

145 if expected_week_day != day_name: 

146 lint_state.emit_diagnostic( 

147 day_name_range, 

148 f"The date was a {expected_week_day}day", 

149 "warning", 

150 "debputy", 

151 quickfixes=[propose_correct_text_quick_fix(expected_week_day)], 

152 ) 

153 

154 

155def _check_email( 

156 lint_state: LintState, 

157 line: str, 

158 line_no: int, 

159 line_len: int, 

160) -> tuple[int, int]: 

161 email_start_idx = _offset_of(line, "<", 4) 

162 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4)) 

163 

164 if not (3 < email_start_idx < email_end_idx): 

165 # Email invalid 

166 if email_start_idx >= 3: 

167 msg = 'Missing closing ">" to finish email address before the sign off date' 

168 diag_start = email_start_idx 

169 diag_end = _offset_of( 

170 line, 

171 " ", 

172 email_start_idx, 

173 offset_if_missing=line_len, 

174 ) 

175 else: 

176 if email_end_idx > -1: 

177 diag_start = 1 + _roffset_of( 

178 line, 

179 " ", 

180 4, 

181 email_end_idx - 1, 

182 offset_if_missing=4, 

183 ) 

184 if diag_start > 4: 184 ↛ 187line 184 didn't jump to line 187 because the condition on line 184 was always true

185 email_start_idx = diag_start 

186 # For consistency, we always include the trailing `>`. 

187 diag_end = email_end_idx + 1 

188 else: 

189 diag_start = 4 

190 diag_end = _offset_of( 

191 line, 

192 " ", 

193 diag_start, 

194 offset_if_missing=line_len, 

195 ) 

196 msg = 'Missing opening "<" to start the email address after the name' 

197 lint_state.emit_diagnostic( 

198 _single_line_subrange(line_no, diag_start, diag_end), 

199 msg, 

200 "error", 

201 DPM_DCH_SECTION, 

202 ) 

203 if email_end_idx < 0: 

204 email_end_idx = diag_end 

205 return email_start_idx, email_end_idx 

206 

207 

208def _check_footer_line( 

209 lint_state: LintState, 

210 line: str, 

211 line_no: int, 

212) -> None: 

213 if line.rstrip() == " --": 

214 lint_state.emit_diagnostic( 

215 _single_line_subrange(line_no, 0, 3), 

216 'Missing "Name <email@example.com>"', 

217 "error", 

218 DPM_DCH_SECTION, 

219 # TODO: Add quick fix to insert all the relevant data. 

220 ) 

221 return 

222 line_len = len(line) 

223 if not line.startswith(" -- "): 

224 # Pre-condition for this function being called. 

225 assert line.startswith(" --") and line_len > 3 

226 lint_state.emit_diagnostic( 

227 _single_line_subrange(line_no, 0, line_len), 

228 'Start of sign-off line should be " -- ".', 

229 "error", 

230 DPM_DCH_SECTION, 

231 quickfixes=[propose_correct_text_quick_fix(" -- ")], 

232 ) 

233 return 

234 

235 email_start_idx, email_end_idx = _check_email( 

236 lint_state, 

237 line, 

238 line_no, 

239 line_len, 

240 ) 

241 

242 start_of_email_identified = email_start_idx > 3 

243 end_of_email_identified = 4 < email_end_idx < line_len 

244 if not start_of_email_identified: 

245 return 

246 

247 # Email valid, name might be missing 

248 name_start = 4 

249 name_end = email_start_idx 

250 name = line[name_start:name_end] 

251 if not name or name.isspace(): 

252 # The range must always be at least one character width. There is no good direction 

253 # to expand it in. Picked left because we know there is a space character, 

254 if name_end - name_start < 1: 

255 name_start -= 1 

256 lint_state.emit_diagnostic( 

257 _single_line_subrange(line_no, name_start, name_end), 

258 "Missing name before email", 

259 "error", 

260 DPM_DCH_SECTION, 

261 ) 

262 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1: 

263 lint_state.emit_diagnostic( 

264 _single_line_subrange(line_no, name_start, name_end), 

265 "Non-standard spacing around the name", 

266 "warning", 

267 DPM_DCH_SECTION, 

268 quickfixes=[ 

269 propose_correct_text_quick_fix( 

270 name.strip() + " ", 

271 proposed_title="Fix spacing", 

272 ) 

273 ], 

274 ) 

275 if not end_of_email_identified: 275 ↛ 282line 275 didn't jump to line 282 because the condition on line 275 was never true

276 # If we are unsure where the email is, we currently do not have the wherewithal to identify 

277 # where the date is. Technically, there are cases where we could identify the date and work 

278 # back from there. Not written because I thought it is too much effort for the value at the 

279 # time I put in this comment. 

280 # 

281 # Note this will already have triggered a diagnostic. 

282 return 

283 post_email = line[email_end_idx + 1 :] 

284 if not post_email or post_email.isspace(): 

285 lint_state.emit_diagnostic( 

286 _single_line_subrange(line_no, 0, line_len), 

287 "Missing sign off date", 

288 "error", 

289 DPM_DCH_SECTION, 

290 ) 

291 return 

292 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1 

293 space_len = start_date_idx - email_end_idx - 1 

294 if line[email_end_idx + 1 : start_date_idx] != " ": 

295 correction = " " 

296 diag_start = email_end_idx + 1 

297 diag_end = start_date_idx 

298 if not space_len: 

299 # If there is no spaces, then we mark the closing `>` and the following character instead if possible. 

300 # 

301 # Note the choice here of including both boundary characters is because some editors refuse to include 

302 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range 

303 # detection will miss it. By including the following character, we ensure there is always a two 

304 # character range to place the cursor in the middle of and the editors tend to respect that as a valid 

305 # range (also, single character ranges are harder for the user to see). 

306 diag_start = email_end_idx 

307 diag_end = min(start_date_idx + 1, line_len) 

308 if start_date_idx < line_len: 308 ↛ 311line 308 didn't jump to line 311 because the condition on line 308 was always true

309 end_char = line[start_date_idx] 

310 else: 

311 end_char = "" 

312 correction = f"> {end_char}" 

313 

314 lint_state.emit_diagnostic( 

315 _single_line_subrange( 

316 line_no, 

317 diag_start, 

318 diag_end, 

319 ), 

320 "Must be exactly two spaces between email and sign off date", 

321 "error", 

322 DPM_DCH_SECTION, 

323 quickfixes=[ 

324 propose_correct_text_quick_fix( 

325 correction, 

326 proposed_title="Fix spacing between email and date", 

327 ), 

328 ], 

329 ) 

330 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx) 

331 

332 

333def _offset_of( 

334 text: str, 

335 ch: str, 

336 /, 

337 start: int | None = None, 

338 end: int | None = None, 

339 *, 

340 offset_if_missing: int = -1, 

341) -> int: 

342 try: 

343 return text.index(ch, start, end) 

344 except ValueError: 

345 return offset_if_missing 

346 

347 

348def _roffset_of( 

349 text: str, 

350 ch: str, 

351 /, 

352 start: int | None = None, 

353 end: int | None = None, 

354 *, 

355 offset_if_missing: int = -1, 

356) -> int: 

357 try: 

358 return text.rindex(ch, start, end) 

359 except ValueError: 

360 return offset_if_missing 

361 

362 

363def _single_line_subrange( 

364 line_no: int, 

365 character_start_pos: int, 

366 character_end_pos: int, 

367) -> "TERange": 

368 return TERange( 

369 TEPosition( 

370 line_no, 

371 character_start_pos, 

372 ), 

373 TEPosition( 

374 line_no, 

375 character_end_pos, 

376 ), 

377 ) 

378 

379 

380def _check_header_line( 

381 lint_state: LintState, 

382 line: str, 

383 line_no: int, 

384 entry_no: int, 

385) -> None: 

386 m = _HEADER_LINE.search(line) 

387 if not m: 387 ↛ 389line 387 didn't jump to line 389 because the condition on line 387 was never true

388 # Syntax error: TODO flag later 

389 return 

390 source_name, source_version = m.groups() 

391 dctrl_source_pkg = lint_state.source_package 

392 if ( 

393 entry_no == 1 

394 and dctrl_source_pkg is not None 

395 and dctrl_source_pkg.fields.get("Source") != source_name 

396 ): 

397 expected_name = dctrl_source_pkg.fields.get("Source") 

398 start_pos, end_pos = m.span(1) 

399 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

400 if expected_name is None: 400 ↛ 401line 400 didn't jump to line 401 because the condition on line 400 was never true

401 msg = ( 

402 "The first entry must use the same source name as debian/control." 

403 ' The d/control file is missing the "Source" field in its first stanza' 

404 ) 

405 else: 

406 msg = ( 

407 "The first entry must use the same source name as debian/control." 

408 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"' 

409 ) 

410 

411 lint_state.emit_diagnostic( 

412 name_range, 

413 msg, 

414 "error", 

415 "dpkg", # man:deb-src-control(5) / #1089794 

416 ) 

417 if not PKGVERSION_REGEX.fullmatch(source_version): 

418 vm = PKGVERSION_REGEX.search(source_version) 

419 start_pos, end_pos = m.span(2) 

420 if vm: 

421 start_valid, end_valid = vm.span(0) 

422 invalid_ranges = [] 

423 if start_valid > 0: 423 ↛ 431line 423 didn't jump to line 431 because the condition on line 423 was always true

424 name_range = _single_line_subrange( 

425 line_no, 

426 start_pos, 

427 start_pos + start_valid, 

428 ) 

429 invalid_ranges.append(name_range) 

430 

431 if end_valid < len(source_version): 431 ↛ 439line 431 didn't jump to line 439 because the condition on line 431 was always true

432 name_range = _single_line_subrange( 

433 line_no, 

434 start_pos + end_valid, 

435 end_pos, 

436 ) 

437 invalid_ranges.append(name_range) 

438 

439 for r in invalid_ranges: 

440 lint_state.emit_diagnostic( 

441 r, 

442 "This part cannot be parsed as a valid Debian version", 

443 "error", 

444 "Policy 5.6.12", 

445 ) 

446 else: 

447 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

448 lint_state.emit_diagnostic( 

449 name_range, 

450 f'Cannot parse "{source_version}" as a Debian version.', 

451 "error", 

452 "Policy 5.6.12", 

453 ) 

454 elif "dsfg" in source_version: 

455 typo_index = source_version.index("dsfg") 

456 start_pos, end_pos = m.span(2) 

457 

458 name_range = _single_line_subrange( 

459 line_no, 

460 start_pos + typo_index, 

461 start_pos + typo_index + 4, 

462 ) 

463 lint_state.emit_diagnostic( 

464 name_range, 

465 'Typo of "dfsg" (Debian Free Software Guidelines)', 

466 "pedantic", 

467 "debputy", 

468 quickfixes=[propose_correct_text_quick_fix("dfsg")], 

469 ) 

470 

471 

472@lint_diagnostics(_DISPATCH_RULE) 

473async def _lint_debian_changelog(lint_state: LintState) -> None: 

474 lines = lint_state.lines 

475 entry_no = 0 

476 entry_limit = 2 

477 max_words = 1000 

478 max_line_length = _MAXIMUM_WIDTH 

479 for line_no, line in enumerate(lines): 

480 orig_line = line 

481 line = line.rstrip() 

482 if not line: 

483 continue 

484 if line.startswith(" --"): 

485 _check_footer_line(lint_state, line, line_no) 

486 continue 

487 if not line.startswith(" "): 

488 if not line[0].isspace(): 488 ↛ 500line 488 didn't jump to line 500 because the condition on line 488 was always true

489 entry_no += 1 

490 # Figure out the right cut which may not be as simple as just the 

491 # top two. 

492 if entry_no > entry_limit: 

493 break 

494 _check_header_line( 

495 lint_state, 

496 line, 

497 line_no, 

498 entry_no, 

499 ) 

500 continue 

501 orig_line_len = len(orig_line) 

502 if orig_line_len > max_line_length: 

503 # We shift the range one to the left. We do not want the range to include the newline 

504 # (each editor handle that differently). 

505 exceeded_line_range = _single_line_subrange( 

506 line_no, 

507 max_line_length - 1, 

508 orig_line_len - 1, 

509 ) 

510 lint_state.emit_diagnostic( 

511 exceeded_line_range, 

512 f"Line exceeds {max_line_length} characters", 

513 "pedantic", 

514 "debputy", 

515 ) 

516 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 516 ↛ 518line 516 didn't jump to line 518 because the condition on line 516 was never true

517 # Do not spell check [ X ] as X is usually a name 

518 continue 

519 if max_words > 0: 519 ↛ 479line 519 didn't jump to line 479 because the condition on line 519 was always true

520 new_diagnostics = spellcheck_line(lint_state, line_no, line) 

521 max_words -= new_diagnostics 

522 

523 

524@lsp_document_link(_DISPATCH_RULE) 

525def _debian_changelog_links( 

526 ls: "DebputyLanguageServer", 

527 params: types.DocumentLinkParams, 

528) -> Optional[Sequence[types.DocumentLink]]: 

529 doc = ls.workspace.get_text_document(params.text_document.uri) 

530 lines = doc.lines 

531 links = [] 

532 

533 for line_no, line in enumerate(lines): 

534 if not line.startswith(" "): 

535 continue 

536 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line) 

537 if not bug_line_match: 

538 continue 

539 bug_offset = bug_line_match.start(0) 

540 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)): 

541 bug_id = bug_match.group(1) 

542 bug_span = bug_match.span() 

543 bug_range = _single_line_subrange( 

544 line_no, 

545 bug_span[0] + bug_offset, 

546 bug_span[1] + bug_offset, 

547 ) 

548 bug_range_client_units = doc.position_codec.range_to_client_units( 

549 lines, 

550 te_range_to_lsp(bug_range), 

551 ) 

552 links.append( 

553 types.DocumentLink( 

554 bug_range_client_units, f"https://bugs.debian.org/{bug_id}" 

555 ) 

556 ) 

557 

558 total_links = len(links) 

559 if total_links >= 100: 559 ↛ 560line 559 didn't jump to line 560 because the condition on line 559 was never true

560 break 

561 

562 return links