Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%

191 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-09-07 09:27 +0000

1import re 

2from email.utils import parsedate_to_datetime 

3from typing import Optional, Tuple 

4 

5from debputy.linting.lint_util import LintState 

6from debputy.lsp.lsp_features import ( 

7 lsp_standard_handler, 

8 SecondaryLanguage, 

9 LanguageDispatchRule, 

10 lint_diagnostics, 

11) 

12from debputy.lsp.quickfixes import ( 

13 propose_correct_text_quick_fix, 

14) 

15from debputy.lsp.spellchecking import spellcheck_line 

16from debputy.lsprotocol.types import ( 

17 TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, 

18 TEXT_DOCUMENT_CODE_ACTION, 

19) 

20from debputy.util import PKGVERSION_REGEX 

21 

22try: 

23 from debputy.lsp.vendoring._deb822_repro.locatable import ( 

24 Position as TEPosition, 

25 Range as TERange, 

26 ) 

27 

28 from pygls.server import LanguageServer 

29 from pygls.workspace import TextDocument 

30 from debputy.lsp.debputy_ls import DebputyLanguageServer 

31except ImportError: 

32 pass 

33 

34 

35# Same as Lintian 

36_MAXIMUM_WIDTH: int = 82 

37_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest 

38_DISPATCH_RULE = LanguageDispatchRule.new_rule( 

39 "debian/changelog", 

40 None, 

41 ("debian/changelog", "debian/changelog.dch"), 

42 [ 

43 # emacs's name 

44 SecondaryLanguage("debian-changelog"), 

45 # vim's name 

46 SecondaryLanguage("debchangelog"), 

47 SecondaryLanguage("dch"), 

48 ], 

49) 

50 

51 

52_WEEKDAYS_BY_IDX = [ 

53 "Mon", 

54 "Tue", 

55 "Wed", 

56 "Thu", 

57 "Fri", 

58 "Sat", 

59 "Sun", 

60] 

61_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX) 

62 

63 

64lsp_standard_handler(_DISPATCH_RULE, TEXT_DOCUMENT_CODE_ACTION) 

65lsp_standard_handler(_DISPATCH_RULE, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) 

66 

67DPM_DCH_SECTION = "Policy 4.4" 

68 

69 

70def _check_footer_date( 

71 lint_state: LintState, 

72 line: str, 

73 line_no: int, 

74 line_len: int, 

75 start_date_idx: int, 

76) -> None: 

77 # 3 characters for the day name (Mon), then a comma plus a space followed by the 

78 # actual date. The 6 characters limit is a gross under estimation of the real 

79 # size. 

80 if line_len < start_date_idx + 6: 

81 text_range = _single_line_subrange(line_no, start_date_idx, line_len) 

82 lint_state.emit_diagnostic( 

83 text_range, 

84 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)", 

85 "error", 

86 DPM_DCH_SECTION, 

87 ) 

88 return 

89 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3) 

90 day_name = line[start_date_idx : start_date_idx + 3] 

91 if day_name not in _KNOWN_WEEK_DAYS: 

92 lint_state.emit_diagnostic( 

93 day_name_range, 

94 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)", 

95 "error", 

96 DPM_DCH_SECTION, 

97 ) 

98 return 

99 

100 date_str = line[start_date_idx + 5 :] 

101 

102 if line[start_date_idx + 3 : start_date_idx + 5] != ", ": 

103 sep = line[start_date_idx + 3 : start_date_idx + 5] 

104 text_range = _single_line_subrange( 

105 line_no, 

106 start_date_idx + 3, 

107 start_date_idx + 5, 

108 ) 

109 lint_state.emit_diagnostic( 

110 text_range, 

111 f'Improper formatting of date. Expected ", " here, not "{sep}"', 

112 "error", 

113 DPM_DCH_SECTION, 

114 ) 

115 return 

116 

117 try: 

118 # FIXME: this parser is too forgiving (it ignores trailing garbage) 

119 date = parsedate_to_datetime(date_str) 

120 except ValueError as e: 

121 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len) 

122 lint_state.emit_diagnostic( 

123 error_range, 

124 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}", 

125 "error", 

126 "debputy", 

127 ) 

128 return 

129 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()] 

130 if expected_week_day != day_name: 

131 lint_state.emit_diagnostic( 

132 day_name_range, 

133 f"The date was a {expected_week_day}day", 

134 "warning", 

135 "debputy", 

136 quickfixes=[propose_correct_text_quick_fix(expected_week_day)], 

137 ) 

138 

139 

140def _check_email( 

141 lint_state: LintState, 

142 line: str, 

143 line_no: int, 

144 line_len: int, 

145) -> Tuple[int, int]: 

146 email_start_idx = _offset_of(line, "<", 4) 

147 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4)) 

148 

149 if not (3 < email_start_idx < email_end_idx): 

150 # Email invalid 

151 if email_start_idx >= 3: 

152 msg = 'Missing closing ">" to finish email address before the sign off date' 

153 diag_start = email_start_idx 

154 diag_end = _offset_of( 

155 line, 

156 " ", 

157 email_start_idx, 

158 offset_if_missing=line_len, 

159 ) 

160 else: 

161 if email_end_idx > -1: 

162 diag_start = 1 + _roffset_of( 

163 line, 

164 " ", 

165 4, 

166 email_end_idx - 1, 

167 offset_if_missing=4, 

168 ) 

169 if diag_start > 4: 169 ↛ 172line 169 didn't jump to line 172 because the condition on line 169 was always true

170 email_start_idx = diag_start 

171 # For consistency, we always include the trailing `>`. 

172 diag_end = email_end_idx + 1 

173 else: 

174 diag_start = 4 

175 diag_end = _offset_of( 

176 line, 

177 " ", 

178 diag_start, 

179 offset_if_missing=line_len, 

180 ) 

181 msg = 'Missing opening "<" to start the email address after the name' 

182 lint_state.emit_diagnostic( 

183 _single_line_subrange(line_no, diag_start, diag_end), 

184 msg, 

185 "error", 

186 DPM_DCH_SECTION, 

187 ) 

188 if email_end_idx < 0: 

189 email_end_idx = diag_end 

190 return email_start_idx, email_end_idx 

191 

192 

193def _check_footer_line( 

194 lint_state: LintState, 

195 line: str, 

196 line_no: int, 

197) -> None: 

198 if line.rstrip() == " --": 

199 lint_state.emit_diagnostic( 

200 _single_line_subrange(line_no, 0, 3), 

201 'Missing "Name <email@example.com>"', 

202 "error", 

203 DPM_DCH_SECTION, 

204 # TODO: Add quick fix to insert all the relevant data. 

205 ) 

206 return 

207 line_len = len(line) 

208 if not line.startswith(" -- "): 

209 # Pre-condition for this function being called. 

210 assert line.startswith(" --") and line_len > 3 

211 lint_state.emit_diagnostic( 

212 _single_line_subrange(line_no, 0, line_len), 

213 'Start of sign-off line should be " -- ".', 

214 "error", 

215 DPM_DCH_SECTION, 

216 quickfixes=[propose_correct_text_quick_fix(" -- ")], 

217 ) 

218 return 

219 

220 email_start_idx, email_end_idx = _check_email( 

221 lint_state, 

222 line, 

223 line_no, 

224 line_len, 

225 ) 

226 

227 start_of_email_identified = email_start_idx > 3 

228 end_of_email_identified = 4 < email_end_idx < line_len 

229 if not start_of_email_identified: 

230 return 

231 

232 # Email valid, name might be missing 

233 name_start = 4 

234 name_end = email_start_idx 

235 name = line[name_start:name_end] 

236 if not name or name.isspace(): 

237 # The range must always be at least one character width. There is no good direction 

238 # to expand it in. Picked left because we know there is a space character, 

239 if name_end - name_start < 1: 

240 name_start -= 1 

241 lint_state.emit_diagnostic( 

242 _single_line_subrange(line_no, name_start, name_end), 

243 "Missing name before email", 

244 "error", 

245 DPM_DCH_SECTION, 

246 ) 

247 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1: 

248 lint_state.emit_diagnostic( 

249 _single_line_subrange(line_no, name_start, name_end), 

250 "Non-standard spacing around the name", 

251 "warning", 

252 DPM_DCH_SECTION, 

253 quickfixes=[ 

254 propose_correct_text_quick_fix( 

255 name.strip() + " ", 

256 proposed_title="Fix spacing", 

257 ) 

258 ], 

259 ) 

260 if not end_of_email_identified: 260 ↛ 267line 260 didn't jump to line 267 because the condition on line 260 was never true

261 # If we are unsure where the email is, we currently do not have the wherewithal to identify 

262 # where the date is. Technically, there are cases where we could identify the date and work 

263 # back from there. Not written because I thought it is too much effort for the value at the 

264 # time I put in this comment. 

265 # 

266 # Note this will already have triggered a diagnostic. 

267 return 

268 post_email = line[email_end_idx + 1 :] 

269 if not post_email or post_email.isspace(): 

270 lint_state.emit_diagnostic( 

271 _single_line_subrange(line_no, 0, line_len), 

272 "Missing sign off date", 

273 "error", 

274 DPM_DCH_SECTION, 

275 ) 

276 return 

277 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1 

278 space_len = start_date_idx - email_end_idx - 1 

279 if line[email_end_idx + 1 : start_date_idx] != " ": 

280 correction = " " 

281 diag_start = email_end_idx + 1 

282 diag_end = start_date_idx 

283 if not space_len: 

284 # If there is no spaces, then we mark the closing `>` and the following character instead if possible. 

285 # 

286 # Note the choice here of including both boundary characters is because some editors refuse to include 

287 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range 

288 # detection will miss it. By including the following character, we ensure there is always a two 

289 # character range to place the cursor in the middle of and the editors tend to respect that as a valid 

290 # range (also, single character ranges are harder for the user to see). 

291 diag_start = email_end_idx 

292 diag_end = min(start_date_idx + 1, line_len) 

293 if start_date_idx < line_len: 293 ↛ 296line 293 didn't jump to line 296 because the condition on line 293 was always true

294 end_char = line[start_date_idx] 

295 else: 

296 end_char = "" 

297 correction = f"> {end_char}" 

298 

299 lint_state.emit_diagnostic( 

300 _single_line_subrange( 

301 line_no, 

302 diag_start, 

303 diag_end, 

304 ), 

305 "Must be exactly two spaces between email and sign off date", 

306 "error", 

307 DPM_DCH_SECTION, 

308 quickfixes=[ 

309 propose_correct_text_quick_fix( 

310 correction, 

311 proposed_title="Fix spacing between email and date", 

312 ), 

313 ], 

314 ) 

315 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx) 

316 

317 

318def _offset_of( 

319 text: str, 

320 ch: str, 

321 /, 

322 start: Optional[int] = None, 

323 end: Optional[int] = None, 

324 *, 

325 offset_if_missing: int = -1, 

326) -> int: 

327 try: 

328 return text.index(ch, start, end) 

329 except ValueError: 

330 return offset_if_missing 

331 

332 

333def _roffset_of( 

334 text: str, 

335 ch: str, 

336 /, 

337 start: Optional[int] = None, 

338 end: Optional[int] = None, 

339 *, 

340 offset_if_missing: int = -1, 

341) -> int: 

342 try: 

343 return text.rindex(ch, start, end) 

344 except ValueError: 

345 return offset_if_missing 

346 

347 

348def _single_line_subrange( 

349 line_no: int, 

350 character_start_pos: int, 

351 character_end_pos: int, 

352) -> "TERange": 

353 return TERange( 

354 TEPosition( 

355 line_no, 

356 character_start_pos, 

357 ), 

358 TEPosition( 

359 line_no, 

360 character_end_pos, 

361 ), 

362 ) 

363 

364 

365def _check_header_line( 

366 lint_state: LintState, 

367 line: str, 

368 line_no: int, 

369 entry_no: int, 

370) -> None: 

371 m = _HEADER_LINE.search(line) 

372 if not m: 372 ↛ 374line 372 didn't jump to line 374 because the condition on line 372 was never true

373 # Syntax error: TODO flag later 

374 return 

375 source_name, source_version = m.groups() 

376 dctrl_source_pkg = lint_state.source_package 

377 if ( 

378 entry_no == 1 

379 and dctrl_source_pkg is not None 

380 and dctrl_source_pkg.fields.get("Source") != source_name 

381 ): 

382 expected_name = dctrl_source_pkg.fields.get("Source") 

383 start_pos, end_pos = m.span(1) 

384 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

385 if expected_name is None: 385 ↛ 386line 385 didn't jump to line 386 because the condition on line 385 was never true

386 msg = ( 

387 "The first entry must use the same source name as debian/control." 

388 ' The d/control file is missing the "Source" field in its first stanza' 

389 ) 

390 else: 

391 msg = ( 

392 "The first entry must use the same source name as debian/control." 

393 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"' 

394 ) 

395 

396 lint_state.emit_diagnostic( 

397 name_range, 

398 msg, 

399 "error", 

400 "dpkg", # man:deb-src-control(5) / #1089794 

401 ) 

402 if not PKGVERSION_REGEX.fullmatch(source_version): 

403 vm = PKGVERSION_REGEX.search(source_version) 

404 start_pos, end_pos = m.span(2) 

405 if vm: 

406 start_valid, end_valid = vm.span(0) 

407 invalid_ranges = [] 

408 if start_valid > 0: 408 ↛ 416line 408 didn't jump to line 416 because the condition on line 408 was always true

409 name_range = _single_line_subrange( 

410 line_no, 

411 start_pos, 

412 start_pos + start_valid, 

413 ) 

414 invalid_ranges.append(name_range) 

415 

416 if end_valid < len(source_version): 416 ↛ 424line 416 didn't jump to line 424 because the condition on line 416 was always true

417 name_range = _single_line_subrange( 

418 line_no, 

419 start_pos + end_valid, 

420 end_pos, 

421 ) 

422 invalid_ranges.append(name_range) 

423 

424 for r in invalid_ranges: 

425 lint_state.emit_diagnostic( 

426 r, 

427 "This part cannot be parsed as a valid Debian version", 

428 "error", 

429 "Policy 5.6.12", 

430 ) 

431 else: 

432 name_range = _single_line_subrange(line_no, start_pos, end_pos) 

433 lint_state.emit_diagnostic( 

434 name_range, 

435 f'Cannot parse "{source_version}" as a Debian version.', 

436 "error", 

437 "Policy 5.6.12", 

438 ) 

439 elif "dsfg" in source_version: 

440 typo_index = source_version.index("dsfg") 

441 start_pos, end_pos = m.span(2) 

442 

443 name_range = _single_line_subrange( 

444 line_no, 

445 start_pos + typo_index, 

446 start_pos + typo_index + 4, 

447 ) 

448 lint_state.emit_diagnostic( 

449 name_range, 

450 'Typo of "dfsg" (Debian Free Software Guidelines)', 

451 "pedantic", 

452 "debputy", 

453 quickfixes=[propose_correct_text_quick_fix("dfsg")], 

454 ) 

455 

456 

457@lint_diagnostics(_DISPATCH_RULE) 

458async def _lint_debian_changelog(lint_state: LintState) -> None: 

459 lines = lint_state.lines 

460 entry_no = 0 

461 entry_limit = 2 

462 max_words = 1000 

463 max_line_length = _MAXIMUM_WIDTH 

464 for line_no, line in enumerate(lines): 

465 orig_line = line 

466 line = line.rstrip() 

467 if not line: 

468 continue 

469 if line.startswith(" --"): 

470 _check_footer_line(lint_state, line, line_no) 

471 continue 

472 if not line.startswith(" "): 

473 if not line[0].isspace(): 473 ↛ 485line 473 didn't jump to line 485 because the condition on line 473 was always true

474 entry_no += 1 

475 # Figure out the right cut which may not be as simple as just the 

476 # top two. 

477 if entry_no > entry_limit: 

478 break 

479 _check_header_line( 

480 lint_state, 

481 line, 

482 line_no, 

483 entry_no, 

484 ) 

485 continue 

486 # minus 1 for newline 

487 orig_line_len = len(orig_line) - 1 

488 if orig_line_len > max_line_length: 

489 exceeded_line_range = _single_line_subrange( 

490 line_no, 

491 max_line_length, 

492 orig_line_len, 

493 ) 

494 lint_state.emit_diagnostic( 

495 exceeded_line_range, 

496 f"Line exceeds {max_line_length} characters", 

497 "pedantic", 

498 "debputy", 

499 ) 

500 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 500 ↛ 502line 500 didn't jump to line 502 because the condition on line 500 was never true

501 # Do not spell check [ X ] as X is usually a name 

502 continue 

503 if max_words > 0: 503 ↛ 464line 503 didn't jump to line 464 because the condition on line 503 was always true

504 new_diagnostics = spellcheck_line(lint_state, line_no, line) 

505 max_words -= new_diagnostics