clang-tools 23.0.0git
check_alphabetical_order.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2#
3# ===-----------------------------------------------------------------------===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11"""
12
13Clang-Tidy Alphabetical Order Checker
14=====================================
15
16Normalize Clang-Tidy documentation with deterministic sorting for linting/tests.
17
18Behavior:
19- Sort entries in docs/clang-tidy/checks/list.rst csv-table.
20- Sort key sections in docs/ReleaseNotes.rst.
21- Detect duplicated entries in 'Changes in existing checks'.
22
23Flags:
24 -o/--output Write normalized content to this path instead of updating docs.
25"""
26
27import argparse
28from collections import defaultdict
29import io
30from operator import itemgetter
31import os
32import re
33import sys
34from typing import (
35 DefaultDict,
36 Final,
37 Iterable,
38 List,
39 NamedTuple,
40 Optional,
41 Sequence,
42 Tuple,
43)
44
45# Matches a :doc:`label <path>` or :doc:`label` reference anywhere in text and
46# captures the label. Used to sort bullet items alphabetically in ReleaseNotes
47# items by their label.
48DOC_LABEL_RN_RE: Final = re.compile(r":doc:`(?P<label>[^`<]+)\s*(?:<[^>]+>)?`")
49
50# Matches a single csv-table row line in list.rst that begins with a :doc:
51# reference, capturing the label. Used to extract the sort key per row.
52DOC_LINE_RE: Final = re.compile(r"^\s*:doc:`(?P<label>[^`<]+?)\s*<[^>]+>`.*$")
53
54
55EXTRA_DIR: Final = os.path.join(os.path.dirname(__file__), "../..")
56DOCS_DIR: Final = os.path.join(EXTRA_DIR, "docs")
57CLANG_TIDY_DOCS_DIR: Final = os.path.join(DOCS_DIR, "clang-tidy")
58CHECKS_DOCS_DIR: Final = os.path.join(CLANG_TIDY_DOCS_DIR, "checks")
59LIST_DOC: Final = os.path.join(CHECKS_DOCS_DIR, "list.rst")
60RELEASE_NOTES_DOC: Final = os.path.join(DOCS_DIR, "ReleaseNotes.rst")
61
62
63# Label extracted from :doc:`...`.
64CheckLabel = str
65Lines = List[str]
66BulletBlock = List[str]
67
68# Pair of the extracted label and its block
69BulletItem = Tuple[CheckLabel, BulletBlock]
70
71# Index of the first line of a bullet block within the full lines list.
72BulletStart = int
73
74# All occurrences for a given label.
75DuplicateOccurrences = List[Tuple[BulletStart, BulletBlock]]
76
77
79 """Structured result of parsing a bullet-list section.
80
81 - prefix: lines before the first bullet within the section range.
82 - blocks: list of (label, block-lines) pairs for each bullet block.
83 - suffix: lines after the last bullet within the section range.
84 """
85
86 prefix: Lines
87 blocks: List[BulletItem]
88 suffix: Lines
89
90
92 """Result of scanning bullet blocks within a section range.
93
94 - blocks_with_pos: list of (start_index, block_lines) for each bullet block.
95 - next_index: index where scanning stopped; start of the suffix region.
96 """
97
98 blocks_with_pos: List[Tuple[BulletStart, BulletBlock]]
99 next_index: int
100
101
102def _scan_bullet_blocks(lines: Sequence[str], start: int, end: int) -> ScannedBlocks:
103 """Scan consecutive bullet blocks and return (blocks_with_pos, next_index).
104
105 Each entry in blocks_with_pos is a tuple of (start_index, block_lines).
106 next_index is the index where scanning stopped (start of suffix).
107 """
108 i = start
109 n = end
110 blocks_with_pos: List[Tuple[BulletStart, BulletBlock]] = []
111 while i < n:
112 if not _is_bullet_start(lines[i]):
113 break
114 bstart = i
115 i += 1
116 while i < n and not _is_bullet_start(lines[i]):
117 if (
118 i + 1 < n
119 and set(lines[i + 1].rstrip("\n")) == {"^"}
120 and lines[i].strip()
121 ):
122 break
123 i += 1
124 block: BulletBlock = list(lines[bstart:i])
125 blocks_with_pos.append((bstart, block))
126 return ScannedBlocks(blocks_with_pos, i)
127
128
129def read_text(path: str) -> str:
130 with io.open(path, "r", encoding="utf-8") as f:
131 return f.read()
132
133
134def write_text(path: str, content: str) -> None:
135 with io.open(path, "w", encoding="utf-8", newline="") as f:
136 f.write(content)
137
138
139def _normalize_list_rst_lines(lines: Sequence[str]) -> List[str]:
140 """Return normalized content of checks list.rst as a list of lines."""
141 out: List[str] = []
142 i = 0
143 n = len(lines)
144
145 def check_name(line: str) -> Tuple[int, CheckLabel]:
146 if m := DOC_LINE_RE.match(line):
147 return (0, m.group("label"))
148 return (1, "")
149
150 while i < n:
151 line = lines[i]
152 if line.lstrip().startswith(".. csv-table::"):
153 out.append(line)
154 i += 1
155
156 while i < n and (lines[i].startswith(" ") or lines[i].strip() == ""):
157 if DOC_LINE_RE.match(lines[i]):
158 break
159 out.append(lines[i])
160 i += 1
161
162 entries: List[str] = []
163 while i < n and lines[i].startswith(" "):
164 entries.append(lines[i])
165 i += 1
166
167 entries_sorted = sorted(entries, key=check_name)
168 out.extend(entries_sorted)
169 continue
170
171 out.append(line)
172 i += 1
173
174 return out
175
176
177def normalize_list_rst(data: str) -> str:
178 """Normalize list.rst content and return a string."""
179 lines = data.splitlines(True)
180 return "".join(_normalize_list_rst_lines(lines))
181
182
183def find_heading(lines: Sequence[str], title: str) -> Optional[int]:
184 """Find heading start index for a section underlined with ^ characters.
185
186 The function looks for a line equal to `title` followed by a line that
187 consists solely of ^, which matches the ReleaseNotes style for subsection
188 headings used here.
189
190 Returns index of the title line, or None if not found.
191 """
192 for i in range(len(lines) - 1):
193 if lines[i].rstrip("\n") == title:
194 if (
195 (underline := lines[i + 1].rstrip("\n"))
196 and set(underline) == {"^"}
197 and len(underline) == len(title)
198 ):
199 return i
200 return None
201
202
203def extract_label(text: str) -> str:
204 if m := DOC_LABEL_RN_RE.search(text):
205 return m.group("label").strip()
206 return text
207
208
209def _is_bullet_start(line: str) -> bool:
210 return line.startswith("- ")
211
212
213def _parse_bullet_blocks(lines: Sequence[str], start: int, end: int) -> BulletBlocks:
214 i = start
215 n = end
216 first_bullet = i
217 while first_bullet < n and not _is_bullet_start(lines[first_bullet]):
218 first_bullet += 1
219 prefix: Lines = list(lines[i:first_bullet])
220
221 blocks: List[BulletItem] = []
222 res = _scan_bullet_blocks(lines, first_bullet, n)
223 for _, block in res.blocks_with_pos:
224 key: CheckLabel = extract_label("".join(block))
225 blocks.append((key, block))
226
227 suffix: Lines = list(lines[res.next_index : n])
228 return BulletBlocks(prefix, blocks, suffix)
229
230
231def sort_blocks(blocks: Iterable[BulletItem]) -> List[BulletBlock]:
232 """Return blocks sorted deterministically by their extracted label.
233
234 Duplicates are preserved; merging is left to authors to handle manually.
235 """
236 return list(map(itemgetter(1), sorted(blocks, key=itemgetter(0))))
237
238
240 lines: Sequence[str], title: str
241) -> List[Tuple[CheckLabel, DuplicateOccurrences]]:
242 """Return detailed duplicate info as (key, [(start_idx, block_lines), ...]).
243
244 start_idx is the 0-based index of the first line of the bullet block in
245 the original lines list. Only keys with more than one occurrence are
246 returned, and occurrences are listed in the order they appear.
247 """
248 bounds = _find_section_bounds(lines, title, None)
249 if bounds is None:
250 return []
251 _, sec_start, sec_end = bounds
252
253 i = sec_start
254 n = sec_end
255
256 while i < n and not _is_bullet_start(lines[i]):
257 i += 1
258
259 blocks_with_pos: List[Tuple[CheckLabel, BulletStart, BulletBlock]] = []
260 res = _scan_bullet_blocks(lines, i, n)
261 for bstart, block in res.blocks_with_pos:
262 key = extract_label(block[0])
263 blocks_with_pos.append((key, bstart, block))
264
265 grouped: DefaultDict[CheckLabel, DuplicateOccurrences] = defaultdict(list)
266 for key, start, block in blocks_with_pos:
267 grouped[key].append((start, block))
268
269 result: List[Tuple[CheckLabel, DuplicateOccurrences]] = []
270 for key, occs in grouped.items():
271 if len(occs) > 1:
272 result.append((key, occs))
273
274 result.sort(key=itemgetter(0))
275 return result
276
277
279 lines: Sequence[str], title: str, next_title: Optional[str]
280) -> Optional[Tuple[int, int, int]]:
281 """Return (h_start, sec_start, sec_end) for section `title`.
282
283 - h_start: index of the section title line
284 - sec_start: index of the first content line after underline
285 - sec_end: index of the first line of the next section title (or end)
286 """
287 if (h_start := find_heading(lines, title)) is None:
288 return None
289
290 sec_start = h_start + 2
291
292 # Determine end of section either from next_title or by scanning.
293 if next_title is not None:
294 if (h_end := find_heading(lines, next_title)) is None:
295 # Scan forward to the next heading-like underline.
296 h_end = sec_start
297 while h_end < len(lines):
298 if (
299 h_end + 1 < len(lines)
300 and lines[h_end].strip()
301 and set(lines[h_end + 1].rstrip("\n")) == {"^"}
302 ):
303 break
304 h_end += 1
305 sec_end = h_end
306 else:
307 # Scan to end or until a heading underline is found.
308 h_end = sec_start
309 while h_end < len(lines):
310 if (
311 h_end + 1 < len(lines)
312 and lines[h_end].strip()
313 and set(lines[h_end + 1].rstrip("\n")) == {"^"}
314 ):
315 break
316 h_end += 1
317 sec_end = h_end
318
319 return h_start, sec_start, sec_end
320
321
323 lines: Sequence[str], title: str, next_title: Optional[str]
324) -> List[str]:
325 """Normalize a single release-notes section and return updated lines."""
326 if (bounds := _find_section_bounds(lines, title, next_title)) is None:
327 return list(lines)
328 _, sec_start, sec_end = bounds
329
330 prefix, blocks, suffix = _parse_bullet_blocks(lines, sec_start, sec_end)
331 sorted_blocks = sort_blocks(blocks)
332
333 new_section: List[str] = []
334 new_section.extend(prefix)
335 for i_b, b in enumerate(sorted_blocks):
336 if i_b > 0 and (
337 not new_section or (new_section and new_section[-1].strip() != "")
338 ):
339 new_section.append("\n")
340 new_section.extend(b)
341 new_section.extend(suffix)
342
343 return list(lines[:sec_start]) + new_section + list(lines[sec_end:])
344
345
346def normalize_release_notes(lines: Sequence[str]) -> str:
347 sections = ["New checks", "New check aliases", "Changes in existing checks"]
348
349 out = list(lines)
350
351 for idx in range(len(sections) - 1, -1, -1):
352 title = sections[idx]
353 next_title = sections[idx + 1] if idx + 1 < len(sections) else None
354 out = _normalize_release_notes_section(out, title, next_title)
355
356 return "".join(out)
357
358
359def _emit_duplicate_report(lines: Sequence[str], title: str) -> Optional[str]:
360 if not (dups_detail := find_duplicate_entries(lines, title)):
361 return None
362 out: List[str] = []
363 out.append(f"Error: Duplicate entries in '{title}'.\n")
364 out.append("\nPlease merge these entries into a single bullet point.\n")
365 for key, occs in dups_detail:
366 out.append(f"\n-- Duplicate: {key}\n")
367 for start_idx, block in occs:
368 out.append(f"- At line {start_idx + 1}:\n")
369 out.append("".join(block))
370 if not (block and block[-1].endswith("\n")):
371 out.append("\n")
372 return "".join(out)
373
374
375def process_release_notes(out_path: str, rn_doc: str) -> int:
376 text = read_text(rn_doc)
377 lines = text.splitlines(True)
378 normalized = normalize_release_notes(lines)
379 write_text(out_path, normalized)
380
381 # Prefer reporting ordering issues first; let diff fail the test.
382 if text != normalized:
383 sys.stderr.write(
384 "\nEntries in 'clang-tools-extra/docs/ReleaseNotes.rst' are not alphabetically sorted.\n"
385 "Fix the ordering by applying diff printed below.\n\n"
386 )
387 return 0
388
389 # Ordering is clean then enforce duplicates.
390 if report := _emit_duplicate_report(lines, "Changes in existing checks"):
391 sys.stderr.write(report)
392 return 3
393 return 0
394
395
396def process_checks_list(out_path: str, list_doc: str) -> int:
397 text = read_text(list_doc)
398 normalized = normalize_list_rst(text)
399
400 if text != normalized:
401 sys.stderr.write(
402 "\nChecks in 'clang-tools-extra/docs/clang-tidy/checks/list.rst' csv-table are not alphabetically sorted.\n"
403 "Fix the ordering by applying diff printed below.\n\n"
404 )
405
406 write_text(out_path, normalized)
407 return 0
408
409
410def main(argv: Sequence[str]) -> int:
411 ap = argparse.ArgumentParser()
412 ap.add_argument("-o", "--output", dest="out", default=None)
413 args = ap.parse_args(argv)
414
415 list_doc, rn_doc = (os.path.normpath(LIST_DOC), os.path.normpath(RELEASE_NOTES_DOC))
416
417 if args.out:
418 out_path = args.out
419 out_lower = os.path.basename(out_path).lower()
420 if "release" in out_lower:
421 return process_release_notes(out_path, rn_doc)
422 else:
423 return process_checks_list(out_path, list_doc)
424
425 process_checks_list(list_doc, list_doc)
426 return process_release_notes(rn_doc, rn_doc)
427
428
429if __name__ == "__main__":
430 sys.exit(main(sys.argv[1:]))
List[Tuple[CheckLabel, DuplicateOccurrences]] find_duplicate_entries(Sequence[str] lines, str title)
List[BulletBlock] sort_blocks(Iterable[BulletItem] blocks)
List[str] _normalize_list_rst_lines(Sequence[str] lines)
Optional[int] find_heading(Sequence[str] lines, str title)
None write_text(str path, str content)
Optional[Tuple[int, int, int]] _find_section_bounds(Sequence[str] lines, str title, Optional[str] next_title)
int process_checks_list(str out_path, str list_doc)
List[str] _normalize_release_notes_section(Sequence[str] lines, str title, Optional[str] next_title)
Optional[str] _emit_duplicate_report(Sequence[str] lines, str title)
str normalize_release_notes(Sequence[str] lines)
ScannedBlocks _scan_bullet_blocks(Sequence[str] lines, int start, int end)
int process_release_notes(str out_path, str rn_doc)
BulletBlocks _parse_bullet_blocks(Sequence[str] lines, int start, int end)