Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%

215 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-10-12 15:06 +0000

1import asyncio 

2import re 

3from email.utils import parsedate_to_datetime 

4from typing import TYPE_CHECKING, Sequence, Optional 

5 

6from debputy.linting.lint_util import LintState 

7from debputy.lsp.lsp_features import ( 

8 lsp_standard_handler, 

9 SecondaryLanguage, 

10 LanguageDispatchRule, 

11 lint_diagnostics, 

12 lsp_document_link, 

13) 

14from debputy.lsp.quickfixes import ( 

15 propose_correct_text_quick_fix, 

16) 

17from debputy.lsp.spellchecking import spellcheck_line 

18from debputy.util import PKGVERSION_REGEX 

19 

20try: 

21 from debputy.lsp.vendoring._deb822_repro.locatable import ( 

22 Position as TEPosition, 

23 Range as TERange, 

24 ) 

25 

26 from pygls.server import LanguageServer 

27 from pygls.workspace import TextDocument 

28 from debputy.lsp.debputy_ls import DebputyLanguageServer 

29except ImportError: 

30 pass 

31 

32 

33if TYPE_CHECKING: 

34 import lsprotocol.types as types 

35else: 

36 import debputy.lsprotocol.types as types 

37 

38 

39# Same as Lintian 

40_MAXIMUM_WIDTH: int = 82 

41_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest 

42_DISPATCH_RULE = LanguageDispatchRule.new_rule( 

43 "debian/changelog", 

44 None, 

45 ("debian/changelog", "debian/changelog.dch"), 

46 [ 

47 # emacs's name 

48 SecondaryLanguage("debian-changelog"), 

49 # vim's name 

50 SecondaryLanguage("debchangelog"), 

51 SecondaryLanguage("dch"), 

52 ], 

53) 

54 

55 

56_WEEKDAYS_BY_IDX = [ 

57 "Mon", 

58 "Tue", 

59 "Wed", 

60 "Thu", 

61 "Fri", 

62 "Sat", 

63 "Sun", 

64] 

65_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX) 

66_BUG_LINKS_FINDER_REGEX = re.compile( 

67 r""" 

68 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)* 

69 """, 

70 re.I | re.VERBOSE, 

71) 

72_INDIVIDUAL_BUGS_REGEX = re.compile( 

73 r"""(?:bug\s?)?(?:#\s?)?(\d+)""", 

74 re.I, 

75) 

76 

77 

78lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION) 

79lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) 

80 

81DPM_DCH_SECTION = "Policy 4.4" 

82 

83 

84def _check_footer_date( 

85 lint_state: LintState, 

86 line: str, 

87 line_no: int, 

88 line_len: int, 

89 start_date_idx: int, 

90) -> None: 

91 # 3 characters for the day name (Mon), then a comma plus a space followed by the 

92 # actual date. The 6 characters limit is a gross under estimation of the real 

93 # size. 

94 if line_len < start_date_idx + 6: 

95 text_range = _single_line_subrange(line_no, start_date_idx, line_len) 

96 lint_state.emit_diagnostic( 

97 text_range, 

98 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)", 

99 "error", 

100 DPM_DCH_SECTION, 

101 ) 

102 return 

103 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3) 

104 day_name = line[start_date_idx : start_date_idx + 3] 

105 if day_name not in _KNOWN_WEEK_DAYS: 

106 lint_state.emit_diagnostic( 

107 day_name_range, 

108 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)", 

109 "error", 

110 DPM_DCH_SECTION, 

111 ) 

112 return 

113 

114 date_str = line[start_date_idx + 5 :] 

115 

116 if line[start_date_idx + 3 : start_date_idx + 5] != ", ": 

117 sep = line[start_date_idx + 3 : start_date_idx + 5] 

118 text_range = _single_line_subrange( 

119 line_no, 

120 start_date_idx + 3, 

121 start_date_idx + 5, 

122 ) 

123 lint_state.emit_diagnostic( 

124 text_range, 

125 f'Improper formatting of date. Expected ", " here, not "{sep}"', 

126 "error", 

127 DPM_DCH_SECTION, 

128 ) 

129 return 

130 

131 try: 

132 # FIXME: this parser is too forgiving (it ignores trailing garbage) 

133 date = parsedate_to_datetime(date_str) 

134 except ValueError as e: 

135 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len) 

136 lint_state.emit_diagnostic( 

137 error_range, 

138 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}", 

139 "error", 

140 "debputy", 

141 ) 

142 return 

143 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()] 

144 if expected_week_day != day_name: 

145 lint_state.emit_diagnostic( 

146 day_name_range, 

147 f"The date was a {expected_week_day}day", 

148 "warning", 

149 "debputy", 

150 quickfixes=[propose_correct_text_quick_fix(expected_week_day)], 

151 ) 

152 

153 

154def _check_email( 

155 lint_state: LintState, 

156 line: str, 

157 line_no: int, 

158 line_len: int, 

159) -> tuple[int, int]: 

160 email_start_idx = _offset_of(line, "<", 4) 

161 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4)) 

162 

163 if not (3 < email_start_idx < email_end_idx): 

164 # Email invalid 

165 if email_start_idx >= 3: 

166 msg = 'Missing closing ">" to finish email address before the sign off date' 

167 diag_start = email_start_idx 

168 diag_end = _offset_of( 

169 line, 

170 " ", 

171 email_start_idx, 

172 offset_if_missing=line_len, 

173 ) 

174 else: 

175 if email_end_idx > -1: 

176 diag_start = 1 + _roffset_of( 

177 line, 

178 " ", 

179 4, 

180 email_end_idx - 1, 

181 offset_if_missing=4, 

182 ) 

183 if diag_start > 4: 183 ↛ 186line 183 didn't jump to line 186 because the condition on line 183 was always true

184 email_start_idx = diag_start 

185 # For consistency, we always include the trailing `>`. 

186 diag_end = email_end_idx + 1 

187 else: 

188 diag_start = 4 

189 diag_end = _offset_of( 

190 line, 

191 " ", 

192 diag_start, 

193 offset_if_missing=line_len, 

194 ) 

195 msg = 'Missing opening "<" to start the email address after the name' 

196 lint_state.emit_diagnostic( 

197 _single_line_subrange(line_no, diag_start, diag_end), 

198 msg, 

199 "error", 

200 DPM_DCH_SECTION, 

201 ) 

202 if email_end_idx < 0: 

203 email_end_idx = diag_end 

204 return email_start_idx, email_end_idx 

205 

206 

207def _check_footer_line( 

208 lint_state: LintState, 

209 line: str, 

210 line_no: int, 

211) -> None: 

212 if line.rstrip() == " --": 

213 lint_state.emit_diagnostic( 

214 _single_line_subrange(line_no, 0, 3), 

215 'Missing "Name <email@example.com>"', 

216 "error", 

217 DPM_DCH_SECTION, 

218 # TODO: Add quick fix to insert all the relevant data. 

219 ) 

220 return 

221 line_len = len(line) 

222 if not line.startswith(" -- "): 

223 # Pre-condition for this function being called. 

224 assert line.startswith(" --") and line_len > 3 

225 lint_state.emit_diagnostic( 

226 _single_line_subrange(line_no, 0, line_len), 

227 'Start of sign-off line should be " -- ".', 

228 "error", 

229 DPM_DCH_SECTION, 

230 quickfixes=[propose_correct_text_quick_fix(" -- ")], 

231 ) 

232 return 

233 

234 email_start_idx, email_end_idx = _check_email( 

235 lint_state, 

236 line, 

237 line_no, 

238 line_len, 

239 ) 

240 

241 start_of_email_identified = email_start_idx > 3 

242 end_of_email_identified = 4 < email_end_idx < line_len 

243 if not start_of_email_identified: 

244 return 

245 

246 # Email valid, name might be missing 

247 name_start = 4 

248 name_end = email_start_idx 

249 name = line[name_start:name_end] 

250 if not name or name.isspace(): 

251 # The range must always be at least one character width. There is no good direction 

252 # to expand it in. Picked left because we know there is a space character, 

253 if name_end - name_start < 1: 

254 name_start -= 1 

255 lint_state.emit_diagnostic( 

256 _single_line_subrange(line_no, name_start, name_end), 

257 "Missing name before email", 

258 "error", 

259 DPM_DCH_SECTION, 

260 ) 

261 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1: 

262 lint_state.emit_diagnostic( 

263 _single_line_subrange(line_no, name_start, name_end), 

264 "Non-standard spacing around the name", 

265 "warning", 

266 DPM_DCH_SECTION, 

267 quickfixes=[ 

268 propose_correct_text_quick_fix( 

269 name.strip() + " ", 

270 proposed_title="Fix spacing", 

271 ) 

272 ], 

273 ) 

274 if not end_of_email_identified: 274 ↛ 281line 274 didn't jump to line 281 because the condition on line 274 was never true

275 # If we are unsure where the email is, we currently do not have the wherewithal to identify 

276 # where the date is. Technically, there are cases where we could identify the date and work 

277 # back from there. Not written because I thought it is too much effort for the value at the 

278 # time I put in this comment. 

279 # 

280 # Note this will already have triggered a diagnostic. 

281 return 

282 post_email = line[email_end_idx + 1 :] 

283 if not post_email or post_email.isspace(): 

284 lint_state.emit_diagnostic( 

285 _single_line_subrange(line_no, 0, line_len), 

286 "Missing sign off date", 

287 "error", 

288 DPM_DCH_SECTION, 

289 ) 

290 return 

291 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1 

292 space_len = start_date_idx - email_end_idx - 1 

293 if line[email_end_idx + 1 : start_date_idx] != " ": 

294 correction = " " 

295 diag_start = email_end_idx + 1 

296 diag_end = start_date_idx 

297 if not space_len: 

298 # If there is no spaces, then we mark the closing `>` and the following character instead if possible. 

299 # 

300 # Note the choice here of including both boundary characters is because some editors refuse to include 

301 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range 

302 # detection will miss it. By including the following character, we ensure there is always a two 

303 # character range to place the cursor in the middle of and the editors tend to respect that as a valid 

304 # range (also, single character ranges are harder for the user to see). 

305 diag_start = email_end_idx 

306 diag_end = min(start_date_idx + 1, line_len) 

307 if start_date_idx < line_len: 307 ↛ 310line 307 didn't jump to line 310 because the condition on line 307 was always true

308 end_char = line[start_date_idx] 

309 else: 

310 end_char = "" 

311 correction = f"> {end_char}" 

312 

313 lint_state.emit_diagnostic( 

314 _single_line_subrange( 

315 line_no, 

316 diag_start, 

317 diag_end, 

318 ), 

319 "Must be exactly two spaces between email and sign off date", 

320 "error", 

321 DPM_DCH_SECTION, 

322 quickfixes=[ 

323 propose_correct_text_quick_fix( 

324 correction, 

325 proposed_title="Fix spacing between email and date", 

326 ), 

327 ], 

328 ) 

329 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx) 

330 

331 

332def _offset_of( 

333 text: str, 

334 ch: str, 

335 /, 

336 start: int | None = None, 

337 end: int | None = None, 

338 *, 

339 offset_if_missing: int = -1, 

340) -> int: 

341 try: 

342 return text.index(ch, start, end) 

343 except ValueError: 

344 return offset_if_missing 

345 

346 

347def _roffset_of( 

348 text: str, 

349 ch: str, 

350 /, 

351 start: int | None = None, 

352 end: int | None = None, 

353 *, 

354 offset_if_missing: int = -1, 

355) -> int: 

356 try: 

357 return text.rindex(ch, start, end) 

358 except ValueError: 

359 return offset_if_missing 

360 

361 

362def _single_line_subrange( 

363 line_no: int, 

364 character_start_pos: int, 

365 character_end_pos: int, 

366) -> "TERange": 

367 return TERange( 

368 TEPosition( 

369 line_no, 

370 character_start_pos, 

371 ), 

372 TEPosition( 

373 line_no, 

374 character_end_pos, 

375 ), 

376 ) 

377 

378 

379def _check_header_line( 

380 lint_state: LintState, 

381 line: str, 

382 line_no: int, 

383 entry_no: int, 

384) -> None: 

385 m = _HEADER_LINE.search(line) 

386 if not m: 386 ↛ 388line 386 didn't jump to line 388 because the condition on line 386 was never true

387 # Syntax error: TODO flag later 

388 return 

389 source_name, source_version = m.groups() 

390 dctrl_source_pkg = lint_state.source_package 

391 if ( 

392 entry_no == 1 

393 and dctrl_source_pkg is not None 

394 and dctrl_source_pkg.fields.get("Source") != source_name 

395 ): 

396 expected_name = dctrl_source_pkg.fields.get("Source") 

397 start_pos, end_pos = m.span(1) 

398 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

399 if expected_name is None: 399 ↛ 400line 399 didn't jump to line 400 because the condition on line 399 was never true

400 msg = ( 

401 "The first entry must use the same source name as debian/control." 

402 ' The d/control file is missing the "Source" field in its first stanza' 

403 ) 

404 else: 

405 msg = ( 

406 "The first entry must use the same source name as debian/control." 

407 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"' 

408 ) 

409 

410 lint_state.emit_diagnostic( 

411 name_range, 

412 msg, 

413 "error", 

414 "dpkg", # man:deb-src-control(5) / #1089794 

415 ) 

416 if not PKGVERSION_REGEX.fullmatch(source_version): 

417 vm = PKGVERSION_REGEX.search(source_version) 

418 start_pos, end_pos = m.span(2) 

419 if vm: 

420 start_valid, end_valid = vm.span(0) 

421 invalid_ranges = [] 

422 if start_valid > 0: 422 ↛ 430line 422 didn't jump to line 430 because the condition on line 422 was always true

423 name_range = _single_line_subrange( 

424 line_no, 

425 start_pos, 

426 start_pos + start_valid, 

427 ) 

428 invalid_ranges.append(name_range) 

429 

430 if end_valid < len(source_version): 430 ↛ 438line 430 didn't jump to line 438 because the condition on line 430 was always true

431 name_range = _single_line_subrange( 

432 line_no, 

433 start_pos + end_valid, 

434 end_pos, 

435 ) 

436 invalid_ranges.append(name_range) 

437 

438 for r in invalid_ranges: 

439 lint_state.emit_diagnostic( 

440 r, 

441 "This part cannot be parsed as a valid Debian version", 

442 "error", 

443 "Policy 5.6.12", 

444 ) 

445 else: 

446 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

447 lint_state.emit_diagnostic( 

448 name_range, 

449 f'Cannot parse "{source_version}" as a Debian version.', 

450 "error", 

451 "Policy 5.6.12", 

452 ) 

453 elif "dsfg" in source_version: 

454 typo_index = source_version.index("dsfg") 

455 start_pos, end_pos = m.span(2) 

456 

457 name_range = _single_line_subrange( 

458 line_no, 

459 start_pos + typo_index, 

460 start_pos + typo_index + 4, 

461 ) 

462 lint_state.emit_diagnostic( 

463 name_range, 

464 'Typo of "dfsg" (Debian Free Software Guidelines)', 

465 "pedantic", 

466 "debputy", 

467 quickfixes=[propose_correct_text_quick_fix("dfsg")], 

468 ) 

469 

470 

471@lint_diagnostics(_DISPATCH_RULE) 

472async def _lint_debian_changelog(lint_state: LintState) -> None: 

473 lines = lint_state.lines 

474 entry_no = 0 

475 entry_limit = 2 

476 max_words = 1000 

477 max_line_length = _MAXIMUM_WIDTH 

478 for line_no, line in enumerate(lines): 

479 orig_line = line 

480 line = line.rstrip() 

481 if not line: 

482 continue 

483 if line.startswith(" --"): 

484 _check_footer_line(lint_state, line, line_no) 

485 continue 

486 if not line.startswith(" "): 

487 if not line[0].isspace(): 487 ↛ 499line 487 didn't jump to line 499 because the condition on line 487 was always true

488 entry_no += 1 

489 # Figure out the right cut which may not be as simple as just the 

490 # top two. 

491 if entry_no > entry_limit: 

492 break 

493 _check_header_line( 

494 lint_state, 

495 line, 

496 line_no, 

497 entry_no, 

498 ) 

499 continue 

500 # minus 1 for newline 

501 orig_line_len = len(orig_line) - 1 

502 if orig_line_len > max_line_length: 

503 exceeded_line_range = _single_line_subrange( 

504 line_no, 

505 max_line_length, 

506 orig_line_len, 

507 ) 

508 lint_state.emit_diagnostic( 

509 exceeded_line_range, 

510 f"Line exceeds {max_line_length} characters", 

511 "pedantic", 

512 "debputy", 

513 ) 

514 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 514 ↛ 516line 514 didn't jump to line 516 because the condition on line 514 was never true

515 # Do not spell check [ X ] as X is usually a name 

516 continue 

517 if max_words > 0: 517 ↛ 478line 517 didn't jump to line 478 because the condition on line 517 was always true

518 new_diagnostics = spellcheck_line(lint_state, line_no, line) 

519 max_words -= new_diagnostics 

520 

521 

522@lsp_document_link(_DISPATCH_RULE) 

523def _debian_changelog_links( 

524 ls: "DebputyLanguageServer", 

525 params: types.DocumentLinkParams, 

526) -> Optional[Sequence[types.DocumentLink]]: 

527 doc = ls.workspace.get_text_document(params.text_document.uri) 

528 lines = doc.lines 

529 links = [] 

530 

531 for line_no, line in enumerate(lines): 

532 if not line.startswith(" "): 

533 continue 

534 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line) 

535 if not bug_line_match: 

536 continue 

537 bug_offset = bug_line_match.start(0) 

538 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)): 

539 bug_id = bug_match.group(1) 

540 bug_span = bug_match.span() 

541 bug_range = _single_line_subrange( 

542 line_no, 

543 bug_span[0] + bug_offset, 

544 bug_span[1] + bug_offset, 

545 ) 

546 links.append( 

547 types.DocumentLink(bug_range, f"https://bugs.debian.org/{bug_id}") 

548 ) 

549 

550 total_links = len(links) 

551 if total_links >= 100: 551 ↛ 552line 551 didn't jump to line 552 because the condition on line 551 was never true

552 break 

553 

554 return links