Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%

215 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-02-14 10:41 +0000

1import re 

2from email.utils import parsedate_to_datetime 

3from typing import TYPE_CHECKING, Sequence, Optional 

4 

5from debputy.linting.lint_util import LintState, te_range_to_lsp 

6from debputy.lsp.lsp_features import ( 

7 lsp_standard_handler, 

8 SecondaryLanguage, 

9 LanguageDispatchRule, 

10 lint_diagnostics, 

11 lsp_document_link, 

12) 

13from debputy.lsp.quickfixes import ( 

14 propose_correct_text_quick_fix, 

15) 

16from debputy.lsp.spellchecking import spellcheck_line 

17from debputy.util import PKGVERSION_REGEX 

18 

19try: 

20 from debputy.lsp.vendoring._deb822_repro.locatable import ( 

21 Position as TEPosition, 

22 Range as TERange, 

23 ) 

24 

25 from pygls.server import LanguageServer 

26 from pygls.workspace import TextDocument 

27 from debputy.lsp.debputy_ls import DebputyLanguageServer 

28except ImportError: 

29 pass 

30 

31 

32if TYPE_CHECKING: 

33 import lsprotocol.types as types 

34else: 

35 import debputy.lsprotocol.types as types 

36 

37 

38# Same as Lintian 

39_MAXIMUM_WIDTH: int = 82 

40_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest 

41_DISPATCH_RULE = LanguageDispatchRule.new_rule( 

42 "debian/changelog", 

43 None, 

44 ("debian/changelog", "debian/changelog.dch"), 

45 [ 

46 # emacs's name 

47 SecondaryLanguage("debian-changelog"), 

48 # vim's name 

49 SecondaryLanguage("debchangelog"), 

50 SecondaryLanguage("dch"), 

51 ], 

52) 

53 

54 

55_WEEKDAYS_BY_IDX = [ 

56 "Mon", 

57 "Tue", 

58 "Wed", 

59 "Thu", 

60 "Fri", 

61 "Sat", 

62 "Sun", 

63] 

64_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX) 

65_BUG_LINKS_FINDER_REGEX = re.compile( 

66 r""" 

67 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)* 

68 """, 

69 re.I | re.VERBOSE, 

70) 

71_INDIVIDUAL_BUGS_REGEX = re.compile( 

72 r"""(?:bug\s?)?(?:#\s?)?(\d+)""", 

73 re.I, 

74) 

75 

76 

77lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION) 

78lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) 

79 

80DPM_DCH_SECTION = "Policy 4.4" 

81 

82 

83def _check_footer_date( 

84 lint_state: LintState, 

85 line: str, 

86 line_no: int, 

87 line_len: int, 

88 start_date_idx: int, 

89) -> None: 

90 # 3 characters for the day name (Mon), then a comma plus a space followed by the 

91 # actual date. The 6 characters limit is a gross under estimation of the real 

92 # size. 

93 if line_len < start_date_idx + 6: 

94 text_range = _single_line_subrange(line_no, start_date_idx, line_len) 

95 lint_state.emit_diagnostic( 

96 text_range, 

97 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)", 

98 "error", 

99 DPM_DCH_SECTION, 

100 ) 

101 return 

102 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3) 

103 day_name = line[start_date_idx : start_date_idx + 3] 

104 if day_name not in _KNOWN_WEEK_DAYS: 

105 lint_state.emit_diagnostic( 

106 day_name_range, 

107 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)", 

108 "error", 

109 DPM_DCH_SECTION, 

110 ) 

111 return 

112 

113 date_str = line[start_date_idx + 5 :] 

114 

115 if line[start_date_idx + 3 : start_date_idx + 5] != ", ": 

116 sep = line[start_date_idx + 3 : start_date_idx + 5] 

117 text_range = _single_line_subrange( 

118 line_no, 

119 start_date_idx + 3, 

120 start_date_idx + 5, 

121 ) 

122 lint_state.emit_diagnostic( 

123 text_range, 

124 f'Improper formatting of date. Expected ", " here, not "{sep}"', 

125 "error", 

126 DPM_DCH_SECTION, 

127 ) 

128 return 

129 

130 try: 

131 # FIXME: this parser is too forgiving (it ignores trailing garbage) 

132 date = parsedate_to_datetime(date_str) 

133 except ValueError as e: 

134 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len) 

135 lint_state.emit_diagnostic( 

136 error_range, 

137 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}", 

138 "error", 

139 "debputy", 

140 ) 

141 return 

142 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()] 

143 if expected_week_day != day_name: 

144 lint_state.emit_diagnostic( 

145 day_name_range, 

146 f"The date was a {expected_week_day}day", 

147 "warning", 

148 "debputy", 

149 quickfixes=[propose_correct_text_quick_fix(expected_week_day)], 

150 ) 

151 

152 

153def _check_email( 

154 lint_state: LintState, 

155 line: str, 

156 line_no: int, 

157 line_len: int, 

158) -> tuple[int, int]: 

159 email_start_idx = _offset_of(line, "<", 4) 

160 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4)) 

161 

162 if not (3 < email_start_idx < email_end_idx): 

163 # Email invalid 

164 if email_start_idx >= 3: 

165 msg = 'Missing closing ">" to finish email address before the sign off date' 

166 diag_start = email_start_idx 

167 diag_end = _offset_of( 

168 line, 

169 " ", 

170 email_start_idx, 

171 offset_if_missing=line_len, 

172 ) 

173 else: 

174 if email_end_idx > -1: 

175 diag_start = 1 + _roffset_of( 

176 line, 

177 " ", 

178 4, 

179 email_end_idx - 1, 

180 offset_if_missing=4, 

181 ) 

182 if diag_start > 4: 182 ↛ 185line 182 didn't jump to line 185 because the condition on line 182 was always true

183 email_start_idx = diag_start 

184 # For consistency, we always include the trailing `>`. 

185 diag_end = email_end_idx + 1 

186 else: 

187 diag_start = 4 

188 diag_end = _offset_of( 

189 line, 

190 " ", 

191 diag_start, 

192 offset_if_missing=line_len, 

193 ) 

194 msg = 'Missing opening "<" to start the email address after the name' 

195 lint_state.emit_diagnostic( 

196 _single_line_subrange(line_no, diag_start, diag_end), 

197 msg, 

198 "error", 

199 DPM_DCH_SECTION, 

200 ) 

201 if email_end_idx < 0: 

202 email_end_idx = diag_end 

203 return email_start_idx, email_end_idx 

204 

205 

206def _check_footer_line( 

207 lint_state: LintState, 

208 line: str, 

209 line_no: int, 

210) -> None: 

211 if line.rstrip() == " --": 

212 lint_state.emit_diagnostic( 

213 _single_line_subrange(line_no, 0, 3), 

214 'Missing "Name <email@example.com>"', 

215 "error", 

216 DPM_DCH_SECTION, 

217 # TODO: Add quick fix to insert all the relevant data. 

218 ) 

219 return 

220 line_len = len(line) 

221 if not line.startswith(" -- "): 

222 # Pre-condition for this function being called. 

223 assert line.startswith(" --") and line_len > 3 

224 lint_state.emit_diagnostic( 

225 _single_line_subrange(line_no, 0, line_len), 

226 'Start of sign-off line should be " -- ".', 

227 "error", 

228 DPM_DCH_SECTION, 

229 quickfixes=[propose_correct_text_quick_fix(" -- ")], 

230 ) 

231 return 

232 

233 email_start_idx, email_end_idx = _check_email( 

234 lint_state, 

235 line, 

236 line_no, 

237 line_len, 

238 ) 

239 

240 start_of_email_identified = email_start_idx > 3 

241 end_of_email_identified = 4 < email_end_idx < line_len 

242 if not start_of_email_identified: 

243 return 

244 

245 # Email valid, name might be missing 

246 name_start = 4 

247 name_end = email_start_idx 

248 name = line[name_start:name_end] 

249 if not name or name.isspace(): 

250 # The range must always be at least one character width. There is no good direction 

251 # to expand it in. Picked left because we know there is a space character, 

252 if name_end - name_start < 1: 

253 name_start -= 1 

254 lint_state.emit_diagnostic( 

255 _single_line_subrange(line_no, name_start, name_end), 

256 "Missing name before email", 

257 "error", 

258 DPM_DCH_SECTION, 

259 ) 

260 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1: 

261 lint_state.emit_diagnostic( 

262 _single_line_subrange(line_no, name_start, name_end), 

263 "Non-standard spacing around the name", 

264 "warning", 

265 DPM_DCH_SECTION, 

266 quickfixes=[ 

267 propose_correct_text_quick_fix( 

268 name.strip() + " ", 

269 proposed_title="Fix spacing", 

270 ) 

271 ], 

272 ) 

273 if not end_of_email_identified: 273 ↛ 280line 273 didn't jump to line 280 because the condition on line 273 was never true

274 # If we are unsure where the email is, we currently do not have the wherewithal to identify 

275 # where the date is. Technically, there are cases where we could identify the date and work 

276 # back from there. Not written because I thought it is too much effort for the value at the 

277 # time I put in this comment. 

278 # 

279 # Note this will already have triggered a diagnostic. 

280 return 

281 post_email = line[email_end_idx + 1 :] 

282 if not post_email or post_email.isspace(): 

283 lint_state.emit_diagnostic( 

284 _single_line_subrange(line_no, 0, line_len), 

285 "Missing sign off date", 

286 "error", 

287 DPM_DCH_SECTION, 

288 ) 

289 return 

290 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1 

291 space_len = start_date_idx - email_end_idx - 1 

292 if line[email_end_idx + 1 : start_date_idx] != " ": 

293 correction = " " 

294 diag_start = email_end_idx + 1 

295 diag_end = start_date_idx 

296 if not space_len: 

297 # If there is no spaces, then we mark the closing `>` and the following character instead if possible. 

298 # 

299 # Note the choice here of including both boundary characters is because some editors refuse to include 

300 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range 

301 # detection will miss it. By including the following character, we ensure there is always a two 

302 # character range to place the cursor in the middle of and the editors tend to respect that as a valid 

303 # range (also, single character ranges are harder for the user to see). 

304 diag_start = email_end_idx 

305 diag_end = min(start_date_idx + 1, line_len) 

306 if start_date_idx < line_len: 306 ↛ 309line 306 didn't jump to line 309 because the condition on line 306 was always true

307 end_char = line[start_date_idx] 

308 else: 

309 end_char = "" 

310 correction = f"> {end_char}" 

311 

312 lint_state.emit_diagnostic( 

313 _single_line_subrange( 

314 line_no, 

315 diag_start, 

316 diag_end, 

317 ), 

318 "Must be exactly two spaces between email and sign off date", 

319 "error", 

320 DPM_DCH_SECTION, 

321 quickfixes=[ 

322 propose_correct_text_quick_fix( 

323 correction, 

324 proposed_title="Fix spacing between email and date", 

325 ), 

326 ], 

327 ) 

328 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx) 

329 

330 

331def _offset_of( 

332 text: str, 

333 ch: str, 

334 /, 

335 start: int | None = None, 

336 end: int | None = None, 

337 *, 

338 offset_if_missing: int = -1, 

339) -> int: 

340 try: 

341 return text.index(ch, start, end) 

342 except ValueError: 

343 return offset_if_missing 

344 

345 

346def _roffset_of( 

347 text: str, 

348 ch: str, 

349 /, 

350 start: int | None = None, 

351 end: int | None = None, 

352 *, 

353 offset_if_missing: int = -1, 

354) -> int: 

355 try: 

356 return text.rindex(ch, start, end) 

357 except ValueError: 

358 return offset_if_missing 

359 

360 

361def _single_line_subrange( 

362 line_no: int, 

363 character_start_pos: int, 

364 character_end_pos: int, 

365) -> "TERange": 

366 return TERange( 

367 TEPosition( 

368 line_no, 

369 character_start_pos, 

370 ), 

371 TEPosition( 

372 line_no, 

373 character_end_pos, 

374 ), 

375 ) 

376 

377 

378def _check_header_line( 

379 lint_state: LintState, 

380 line: str, 

381 line_no: int, 

382 entry_no: int, 

383) -> None: 

384 m = _HEADER_LINE.search(line) 

385 if not m: 385 ↛ 387line 385 didn't jump to line 387 because the condition on line 385 was never true

386 # Syntax error: TODO flag later 

387 return 

388 source_name, source_version = m.groups() 

389 dctrl_source_pkg = lint_state.source_package 

390 if ( 

391 entry_no == 1 

392 and dctrl_source_pkg is not None 

393 and dctrl_source_pkg.fields.get("Source") != source_name 

394 ): 

395 expected_name = dctrl_source_pkg.fields.get("Source") 

396 start_pos, end_pos = m.span(1) 

397 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

398 if expected_name is None: 398 ↛ 399line 398 didn't jump to line 399 because the condition on line 398 was never true

399 msg = ( 

400 "The first entry must use the same source name as debian/control." 

401 ' The d/control file is missing the "Source" field in its first stanza' 

402 ) 

403 else: 

404 msg = ( 

405 "The first entry must use the same source name as debian/control." 

406 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"' 

407 ) 

408 

409 lint_state.emit_diagnostic( 

410 name_range, 

411 msg, 

412 "error", 

413 "dpkg", # man:deb-src-control(5) / #1089794 

414 ) 

415 if not PKGVERSION_REGEX.fullmatch(source_version): 

416 vm = PKGVERSION_REGEX.search(source_version) 

417 start_pos, end_pos = m.span(2) 

418 if vm: 

419 start_valid, end_valid = vm.span(0) 

420 invalid_ranges = [] 

421 if start_valid > 0: 421 ↛ 429line 421 didn't jump to line 429 because the condition on line 421 was always true

422 name_range = _single_line_subrange( 

423 line_no, 

424 start_pos, 

425 start_pos + start_valid, 

426 ) 

427 invalid_ranges.append(name_range) 

428 

429 if end_valid < len(source_version): 429 ↛ 437line 429 didn't jump to line 437 because the condition on line 429 was always true

430 name_range = _single_line_subrange( 

431 line_no, 

432 start_pos + end_valid, 

433 end_pos, 

434 ) 

435 invalid_ranges.append(name_range) 

436 

437 for r in invalid_ranges: 

438 lint_state.emit_diagnostic( 

439 r, 

440 "This part cannot be parsed as a valid Debian version", 

441 "error", 

442 "Policy 5.6.12", 

443 ) 

444 else: 

445 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

446 lint_state.emit_diagnostic( 

447 name_range, 

448 f'Cannot parse "{source_version}" as a Debian version.', 

449 "error", 

450 "Policy 5.6.12", 

451 ) 

452 elif "dsfg" in source_version: 

453 typo_index = source_version.index("dsfg") 

454 start_pos, end_pos = m.span(2) 

455 

456 name_range = _single_line_subrange( 

457 line_no, 

458 start_pos + typo_index, 

459 start_pos + typo_index + 4, 

460 ) 

461 lint_state.emit_diagnostic( 

462 name_range, 

463 'Typo of "dfsg" (Debian Free Software Guidelines)', 

464 "pedantic", 

465 "debputy", 

466 quickfixes=[propose_correct_text_quick_fix("dfsg")], 

467 ) 

468 

469 

470@lint_diagnostics(_DISPATCH_RULE) 

471async def _lint_debian_changelog(lint_state: LintState) -> None: 

472 lines = lint_state.lines 

473 entry_no = 0 

474 entry_limit = 2 

475 max_words = 1000 

476 max_line_length = _MAXIMUM_WIDTH 

477 for line_no, line in enumerate(lines): 

478 orig_line = line 

479 line = line.rstrip() 

480 if not line: 

481 continue 

482 if line.startswith(" --"): 

483 _check_footer_line(lint_state, line, line_no) 

484 continue 

485 if not line.startswith(" "): 

486 if not line[0].isspace(): 486 ↛ 498line 486 didn't jump to line 498 because the condition on line 486 was always true

487 entry_no += 1 

488 # Figure out the right cut which may not be as simple as just the 

489 # top two. 

490 if entry_no > entry_limit: 

491 break 

492 _check_header_line( 

493 lint_state, 

494 line, 

495 line_no, 

496 entry_no, 

497 ) 

498 continue 

499 # minus 1 for newline 

500 orig_line_len = len(orig_line) - 1 

501 if orig_line_len > max_line_length: 

502 exceeded_line_range = _single_line_subrange( 

503 line_no, 

504 max_line_length, 

505 orig_line_len, 

506 ) 

507 lint_state.emit_diagnostic( 

508 exceeded_line_range, 

509 f"Line exceeds {max_line_length} characters", 

510 "pedantic", 

511 "debputy", 

512 ) 

513 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 513 ↛ 515line 513 didn't jump to line 515 because the condition on line 513 was never true

514 # Do not spell check [ X ] as X is usually a name 

515 continue 

516 if max_words > 0: 516 ↛ 477line 516 didn't jump to line 477 because the condition on line 516 was always true

517 new_diagnostics = spellcheck_line(lint_state, line_no, line) 

518 max_words -= new_diagnostics 

519 

520 

521@lsp_document_link(_DISPATCH_RULE) 

522def _debian_changelog_links( 

523 ls: "DebputyLanguageServer", 

524 params: types.DocumentLinkParams, 

525) -> Optional[Sequence[types.DocumentLink]]: 

526 doc = ls.workspace.get_text_document(params.text_document.uri) 

527 lines = doc.lines 

528 links = [] 

529 

530 for line_no, line in enumerate(lines): 

531 if not line.startswith(" "): 

532 continue 

533 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line) 

534 if not bug_line_match: 

535 continue 

536 bug_offset = bug_line_match.start(0) 

537 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)): 

538 bug_id = bug_match.group(1) 

539 bug_span = bug_match.span() 

540 bug_range = _single_line_subrange( 

541 line_no, 

542 bug_span[0] + bug_offset, 

543 bug_span[1] + bug_offset, 

544 ) 

545 bug_range_client_units = doc.position_codec.range_to_client_units( 

546 lines, 

547 te_range_to_lsp(bug_range), 

548 ) 

549 links.append( 

550 types.DocumentLink( 

551 bug_range_client_units, f"https://bugs.debian.org/{bug_id}" 

552 ) 

553 ) 

554 

555 total_links = len(links) 

556 if total_links >= 100: 556 ↛ 557line 556 didn't jump to line 557 because the condition on line 556 was never true

557 break 

558 

559 return links