Coverage for src/debputy/lsp/languages/lsp_debian

1import asyncio

2import re

3from email.utils import parsedate_to_datetime

4from typing import TYPE_CHECKING, Sequence, Optional

6from debputy.linting.lint_util import LintState

7from debputy.lsp.lsp_features import (

8 lsp_standard_handler,

9 SecondaryLanguage,

10 LanguageDispatchRule,

11 lint_diagnostics,

12 lsp_document_link,

13)

14from debputy.lsp.quickfixes import (

15 propose_correct_text_quick_fix,

16)

17from debputy.lsp.spellchecking import spellcheck_line

18from debputy.util import PKGVERSION_REGEX

20try:

21 from debputy.lsp.vendoring._deb822_repro.locatable import (

22 Position as TEPosition,

23 Range as TERange,

24 )

26 from pygls.server import LanguageServer

27 from pygls.workspace import TextDocument

28 from debputy.lsp.debputy_ls import DebputyLanguageServer

29except ImportError:

30 pass

33if TYPE_CHECKING:

34 import lsprotocol.types as types

35else:

36 import debputy.lsprotocol.types as types

39# Same as Lintian

40_MAXIMUM_WIDTH: int = 82

41_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]") # TODO: Add rest

42_DISPATCH_RULE = LanguageDispatchRule.new_rule(

43 "debian/changelog",

44 None,

45 ("debian/changelog", "debian/changelog.dch"),

46 [

47 # emacs's name

48 SecondaryLanguage("debian-changelog"),

49 # vim's name

50 SecondaryLanguage("debchangelog"),

51 SecondaryLanguage("dch"),

52 ],

53)

56_WEEKDAYS_BY_IDX = [

57 "Mon",

58 "Tue",

59 "Wed",

60 "Thu",

61 "Fri",

62 "Sat",

63 "Sun",

64]

65_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)

66_BUG_LINKS_FINDER_REGEX = re.compile(

67 r"""

68 (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)*

69 """,

70 re.I | re.VERBOSE,

71)

72_INDIVIDUAL_BUGS_REGEX = re.compile(

73 r"""(?:bug\s?)?(?:#\s?)?(\d+)""",

74 re.I,

75)

78lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION)

79lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)

81DPM_DCH_SECTION = "Policy 4.4"

84def _check_footer_date(

85 lint_state: LintState,

86 line: str,

87 line_no: int,

88 line_len: int,

89 start_date_idx: int,

90) -> None:

91 # 3 characters for the day name (Mon), then a comma plus a space followed by the

92 # actual date. The 6 characters limit is a gross under estimation of the real

93 # size.

94 if line_len < start_date_idx + 6:

95 text_range = _single_line_subrange(line_no, start_date_idx, line_len)

96 lint_state.emit_diagnostic(

97 text_range,

98 "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",

99 "error",

100 DPM_DCH_SECTION,

101 )

102 return

103 day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)

104 day_name = line[start_date_idx : start_date_idx + 3]

105 if day_name not in _KNOWN_WEEK_DAYS:

106 lint_state.emit_diagnostic(

107 day_name_range,

108 "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",

109 "error",

110 DPM_DCH_SECTION,

111 )

112 return

113

114 date_str = line[start_date_idx + 5 :]

115

116 if line[start_date_idx + 3 : start_date_idx + 5] != ", ":

117 sep = line[start_date_idx + 3 : start_date_idx + 5]

118 text_range = _single_line_subrange(

119 line_no,

120 start_date_idx + 3,

121 start_date_idx + 5,

122 )

123 lint_state.emit_diagnostic(

124 text_range,

125 f'Improper formatting of date. Expected ", " here, not "{sep}"',

126 "error",

127 DPM_DCH_SECTION,

128 )

129 return

130

131 try:

132 # FIXME: this parser is too forgiving (it ignores trailing garbage)

133 date = parsedate_to_datetime(date_str)

134 except ValueError as e:

135 error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)

136 lint_state.emit_diagnostic(

137 error_range,

138 f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",

139 "error",

140 "debputy",

141 )

142 return

143 expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]

144 if expected_week_day != day_name:

145 lint_state.emit_diagnostic(

146 day_name_range,

147 f"The date was a {expected_week_day}day",

148 "warning",

149 "debputy",

150 quickfixes=[propose_correct_text_quick_fix(expected_week_day)],

151 )

152

153

154def _check_email(

155 lint_state: LintState,

156 line: str,

157 line_no: int,

158 line_len: int,

159) -> tuple[int, int]:

160 email_start_idx = _offset_of(line, "<", 4)

161 email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))

162

163 if not (3 < email_start_idx < email_end_idx):

164 # Email invalid

165 if email_start_idx >= 3:

166 msg = 'Missing closing ">" to finish email address before the sign off date'

167 diag_start = email_start_idx

168 diag_end = _offset_of(

169 line,

170 " ",

171 email_start_idx,

172 offset_if_missing=line_len,

173 )

174 else:

175 if email_end_idx > -1:

176 diag_start = 1 + _roffset_of(

177 line,

178 " ",

179 4,

180 email_end_idx - 1,

181 offset_if_missing=4,

182 )

183 if diag_start > 4: 183 ↛ 186line 183 didn't jump to line 186 because the condition on line 183 was always true

184 email_start_idx = diag_start

185 # For consistency, we always include the trailing `>`.

186 diag_end = email_end_idx + 1

187 else:

188 diag_start = 4

189 diag_end = _offset_of(

190 line,

191 " ",

192 diag_start,

193 offset_if_missing=line_len,

194 )

195 msg = 'Missing opening "<" to start the email address after the name'

196 lint_state.emit_diagnostic(

197 _single_line_subrange(line_no, diag_start, diag_end),

198 msg,

199 "error",

200 DPM_DCH_SECTION,

201 )

202 if email_end_idx < 0:

203 email_end_idx = diag_end

204 return email_start_idx, email_end_idx

205

206

207def _check_footer_line(

208 lint_state: LintState,

209 line: str,

210 line_no: int,

211) -> None:

212 if line.rstrip() == " --":

213 lint_state.emit_diagnostic(

214 _single_line_subrange(line_no, 0, 3),

215 'Missing "Name <email@example.com>"',

216 "error",

217 DPM_DCH_SECTION,

218 # TODO: Add quick fix to insert all the relevant data.

219 )

220 return

221 line_len = len(line)

222 if not line.startswith(" -- "):

223 # Pre-condition for this function being called.

224 assert line.startswith(" --") and line_len > 3

225 lint_state.emit_diagnostic(

226 _single_line_subrange(line_no, 0, line_len),

227 'Start of sign-off line should be " -- ".',

228 "error",

229 DPM_DCH_SECTION,

230 quickfixes=[propose_correct_text_quick_fix(" -- ")],

231 )

232 return

233

234 email_start_idx, email_end_idx = _check_email(

235 lint_state,

236 line,

237 line_no,

238 line_len,

239 )

240

241 start_of_email_identified = email_start_idx > 3

242 end_of_email_identified = 4 < email_end_idx < line_len

243 if not start_of_email_identified:

244 return

245

246 # Email valid, name might be missing

247 name_start = 4

248 name_end = email_start_idx

249 name = line[name_start:name_end]

250 if not name or name.isspace():

251 # The range must always be at least one character width. There is no good direction

252 # to expand it in. Picked left because we know there is a space character,

253 if name_end - name_start < 1:

254 name_start -= 1

255 lint_state.emit_diagnostic(

256 _single_line_subrange(line_no, name_start, name_end),

257 "Missing name before email",

258 "error",

259 DPM_DCH_SECTION,

260 )

261 elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:

262 lint_state.emit_diagnostic(

263 _single_line_subrange(line_no, name_start, name_end),

264 "Non-standard spacing around the name",

265 "warning",

266 DPM_DCH_SECTION,

267 quickfixes=[

268 propose_correct_text_quick_fix(

269 name.strip() + " ",

270 proposed_title="Fix spacing",

271 )

272 ],

273 )

274 if not end_of_email_identified: 274 ↛ 281line 274 didn't jump to line 281 because the condition on line 274 was never true

275 # If we are unsure where the email is, we currently do not have the wherewithal to identify

276 # where the date is. Technically, there are cases where we could identify the date and work

277 # back from there. Not written because I thought it is too much effort for the value at the

278 # time I put in this comment.

279 #

280 # Note this will already have triggered a diagnostic.

281 return

282 post_email = line[email_end_idx + 1 :]

283 if not post_email or post_email.isspace():

284 lint_state.emit_diagnostic(

285 _single_line_subrange(line_no, 0, line_len),

286 "Missing sign off date",

287 "error",

288 DPM_DCH_SECTION,

289 )

290 return

291 start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1

292 space_len = start_date_idx - email_end_idx - 1

293 if line[email_end_idx + 1 : start_date_idx] != " ":

294 correction = " "

295 diag_start = email_end_idx + 1

296 diag_end = start_date_idx

297 if not space_len:

298 # If there is no spaces, then we mark the closing `>` and the following character instead if possible.

299 #

300 # Note the choice here of including both boundary characters is because some editors refuse to include

301 # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range

302 # detection will miss it. By including the following character, we ensure there is always a two

303 # character range to place the cursor in the middle of and the editors tend to respect that as a valid

304 # range (also, single character ranges are harder for the user to see).

305 diag_start = email_end_idx

306 diag_end = min(start_date_idx + 1, line_len)

307 if start_date_idx < line_len: 307 ↛ 310line 307 didn't jump to line 310 because the condition on line 307 was always true

308 end_char = line[start_date_idx]

309 else:

310 end_char = ""

311 correction = f"> {end_char}"

312

313 lint_state.emit_diagnostic(

314 _single_line_subrange(

315 line_no,

316 diag_start,

317 diag_end,

318 ),

319 "Must be exactly two spaces between email and sign off date",

320 "error",

321 DPM_DCH_SECTION,

322 quickfixes=[

323 propose_correct_text_quick_fix(

324 correction,

325 proposed_title="Fix spacing between email and date",

326 ),

327 ],

328 )

329 _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)

330

331

332def _offset_of(

333 text: str,

334 ch: str,

335 /,

336 start: int | None = None,

337 end: int | None = None,

338 *,

339 offset_if_missing: int = -1,

340) -> int:

341 try:

342 return text.index(ch, start, end)

343 except ValueError:

344 return offset_if_missing

345

346

347def _roffset_of(

348 text: str,

349 ch: str,

350 /,

351 start: int | None = None,

352 end: int | None = None,

353 *,

354 offset_if_missing: int = -1,

355) -> int:

356 try:

357 return text.rindex(ch, start, end)

358 except ValueError:

359 return offset_if_missing

360

361

362def _single_line_subrange(

363 line_no: int,

364 character_start_pos: int,

365 character_end_pos: int,

366) -> "TERange":

367 return TERange(

368 TEPosition(

369 line_no,

370 character_start_pos,

371 ),

372 TEPosition(

373 line_no,

374 character_end_pos,

375 ),

376 )

377

378

379def _check_header_line(

380 lint_state: LintState,

381 line: str,

382 line_no: int,

383 entry_no: int,

384) -> None:

385 m = _HEADER_LINE.search(line)

386 if not m: 386 ↛ 388line 386 didn't jump to line 388 because the condition on line 386 was never true

387 # Syntax error: TODO flag later

388 return

389 source_name, source_version = m.groups()

390 dctrl_source_pkg = lint_state.source_package

391 if (

392 entry_no == 1

393 and dctrl_source_pkg is not None

394 and dctrl_source_pkg.fields.get("Source") != source_name

395 ):

396 expected_name = dctrl_source_pkg.fields.get("Source")

397 start_pos, end_pos = m.span(1)

398 name_range = _single_line_subrange(line_no, start_pos, end_pos)

399 if expected_name is None: 399 ↛ 400line 399 didn't jump to line 400 because the condition on line 399 was never true

400 msg = (

401 "The first entry must use the same source name as debian/control."

402 ' The d/control file is missing the "Source" field in its first stanza'

403 )

404 else:

405 msg = (

406 "The first entry must use the same source name as debian/control."

407 f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'

408 )

409

410 lint_state.emit_diagnostic(

411 name_range,

412 msg,

413 "error",

414 "dpkg", # man:deb-src-control(5) / #1089794

415 )

416 if not PKGVERSION_REGEX.fullmatch(source_version):

417 vm = PKGVERSION_REGEX.search(source_version)

418 start_pos, end_pos = m.span(2)

419 if vm:

420 start_valid, end_valid = vm.span(0)

421 invalid_ranges = []

422 if start_valid > 0: 422 ↛ 430line 422 didn't jump to line 430 because the condition on line 422 was always true

423 name_range = _single_line_subrange(

424 line_no,

425 start_pos,

426 start_pos + start_valid,

427 )

428 invalid_ranges.append(name_range)

429

430 if end_valid < len(source_version): 430 ↛ 438line 430 didn't jump to line 438 because the condition on line 430 was always true

431 name_range = _single_line_subrange(

432 line_no,

433 start_pos + end_valid,

434 end_pos,

435 )

436 invalid_ranges.append(name_range)

437

438 for r in invalid_ranges:

439 lint_state.emit_diagnostic(

440 r,

441 "This part cannot be parsed as a valid Debian version",

442 "error",

443 "Policy 5.6.12",

444 )

445 else:

446 name_range = _single_line_subrange(line_no, start_pos, end_pos)

447 lint_state.emit_diagnostic(

448 name_range,

449 f'Cannot parse "{source_version}" as a Debian version.',

450 "error",

451 "Policy 5.6.12",

452 )

453 elif "dsfg" in source_version:

454 typo_index = source_version.index("dsfg")

455 start_pos, end_pos = m.span(2)

456

457 name_range = _single_line_subrange(

458 line_no,

459 start_pos + typo_index,

460 start_pos + typo_index + 4,

461 )

462 lint_state.emit_diagnostic(

463 name_range,

464 'Typo of "dfsg" (Debian Free Software Guidelines)',

465 "pedantic",

466 "debputy",

467 quickfixes=[propose_correct_text_quick_fix("dfsg")],

468 )

469

470

471@lint_diagnostics(_DISPATCH_RULE)

472async def _lint_debian_changelog(lint_state: LintState) -> None:

473 lines = lint_state.lines

474 entry_no = 0

475 entry_limit = 2

476 max_words = 1000

477 max_line_length = _MAXIMUM_WIDTH

478 for line_no, line in enumerate(lines):

479 orig_line = line

480 line = line.rstrip()

481 if not line:

482 continue

483 if line.startswith(" --"):

484 _check_footer_line(lint_state, line, line_no)

485 continue

486 if not line.startswith(" "):

487 if not line[0].isspace(): 487 ↛ 499line 487 didn't jump to line 499 because the condition on line 487 was always true

488 entry_no += 1

489 # Figure out the right cut which may not be as simple as just the

490 # top two.

491 if entry_no > entry_limit:

492 break

493 _check_header_line(

494 lint_state,

495 line,

496 line_no,

497 entry_no,

498 )

499 continue

500 # minus 1 for newline

501 orig_line_len = len(orig_line) - 1

502 if orig_line_len > max_line_length:

503 exceeded_line_range = _single_line_subrange(

504 line_no,

505 max_line_length,

506 orig_line_len,

507 )

508 lint_state.emit_diagnostic(

509 exceeded_line_range,

510 f"Line exceeds {max_line_length} characters",

511 "pedantic",

512 "debputy",

513 )

514 if len(line) > 3 and line[2] == "[" and line[-1] == "]": 514 ↛ 516line 514 didn't jump to line 516 because the condition on line 514 was never true

515 # Do not spell check [ X ] as X is usually a name

516 continue

517 if max_words > 0: 517 ↛ 478line 517 didn't jump to line 478 because the condition on line 517 was always true

518 new_diagnostics = spellcheck_line(lint_state, line_no, line)

519 max_words -= new_diagnostics

520

521

522@lsp_document_link(_DISPATCH_RULE)

523def _debian_changelog_links(

524 ls: "DebputyLanguageServer",

525 params: types.DocumentLinkParams,

526) -> Optional[Sequence[types.DocumentLink]]:

527 doc = ls.workspace.get_text_document(params.text_document.uri)

528 lines = doc.lines

529 links = []

530

531 for line_no, line in enumerate(lines):

532 if not line.startswith(" "):

533 continue

534 bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line)

535 if not bug_line_match:

536 continue

537 bug_offset = bug_line_match.start(0)

538 for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)):

539 bug_id = bug_match.group(1)

540 bug_span = bug_match.span()

541 bug_range = _single_line_subrange(

542 line_no,

543 bug_span[0] + bug_offset,

544 bug_span[1] + bug_offset,

545 )

546 links.append(

547 types.DocumentLink(bug_range, f"https://bugs.debian.org/{bug_id}")

548 )

549

550 total_links = len(links)

551 if total_links >= 100: 551 ↛ 552line 551 didn't jump to line 552 because the condition on line 551 was never true

552 break

553

554 return links

Coverage for src/debputy/lsp/languages/lsp_debian_changelog.py: 94%

215 statements