clang-tools 22.0.0git
check_alphabetical_order.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2#
3# ===-----------------------------------------------------------------------===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11"""
12
13Clang-Tidy Alphabetical Order Checker
14=====================================
15
16Normalize Clang-Tidy documentation with deterministic sorting for linting/tests.
17
18Behavior:
19- Sort entries in docs/clang-tidy/checks/list.rst csv-table.
20- Sort key sections in docs/ReleaseNotes.rst.
21- Detect duplicated entries in 'Changes in existing checks'.
22
23Flags:
24 -o/--output Write normalized content to this path instead of updating docs.
25"""
26
27import argparse
28from collections import defaultdict
29import io
30from operator import itemgetter
31import os
32import re
33import sys
34from typing import (
35 DefaultDict,
36 Final,
37 Iterable,
38 List,
39 NamedTuple,
40 Optional,
41 Sequence,
42 Tuple,
43)
44
45# Matches a :doc:`label <path>` or :doc:`label` reference anywhere in text and
46# captures the label. Used to sort bullet items alphabetically in ReleaseNotes
47# items by their label.
48DOC_LABEL_RN_RE: Final = re.compile(r":doc:`(?P<label>[^`<]+)\s*(?:<[^>]+>)?`")
49
50# Matches a single csv-table row line in list.rst that begins with a :doc:
51# reference, capturing the label. Used to extract the sort key per row.
52DOC_LINE_RE: Final = re.compile(r"^\s*:doc:`(?P<label>[^`<]+?)\s*<[^>]+>`.*$")
53
54
55EXTRA_DIR: Final = os.path.join(os.path.dirname(__file__), "../..")
56DOCS_DIR: Final = os.path.join(EXTRA_DIR, "docs")
57CLANG_TIDY_DOCS_DIR: Final = os.path.join(DOCS_DIR, "clang-tidy")
58CHECKS_DOCS_DIR: Final = os.path.join(CLANG_TIDY_DOCS_DIR, "checks")
59LIST_DOC: Final = os.path.join(CHECKS_DOCS_DIR, "list.rst")
60RELEASE_NOTES_DOC: Final = os.path.join(DOCS_DIR, "ReleaseNotes.rst")
61
62
63# Label extracted from :doc:`...`.
64CheckLabel = str
65Lines = List[str]
66BulletBlock = List[str]
67
68# Pair of the extracted label and its block
69BulletItem = Tuple[CheckLabel, BulletBlock]
70
71# Index of the first line of a bullet block within the full lines list.
72BulletStart = int
73
74# All occurrences for a given label.
75DuplicateOccurrences = List[Tuple[BulletStart, BulletBlock]]
76
77
79 """Structured result of parsing a bullet-list section.
80
81 - prefix: lines before the first bullet within the section range.
82 - blocks: list of (label, block-lines) pairs for each bullet block.
83 - suffix: lines after the last bullet within the section range.
84 """
85
86 prefix: Lines
87 blocks: List[BulletItem]
88 suffix: Lines
89
90
92 """Result of scanning bullet blocks within a section range.
93
94 - blocks_with_pos: list of (start_index, block_lines) for each bullet block.
95 - next_index: index where scanning stopped; start of the suffix region.
96 """
97
98 blocks_with_pos: List[Tuple[BulletStart, BulletBlock]]
99 next_index: int
100
101
102def _scan_bullet_blocks(lines: Sequence[str], start: int, end: int) -> ScannedBlocks:
103 """Scan consecutive bullet blocks and return (blocks_with_pos, next_index).
104
105 Each entry in blocks_with_pos is a tuple of (start_index, block_lines).
106 next_index is the index where scanning stopped (start of suffix).
107 """
108 i = start
109 n = end
110 blocks_with_pos: List[Tuple[BulletStart, BulletBlock]] = []
111 while i < n:
112 if not _is_bullet_start(lines[i]):
113 break
114 bstart = i
115 i += 1
116 while i < n and not _is_bullet_start(lines[i]):
117 if (
118 i + 1 < n
119 and set(lines[i + 1].rstrip("\n")) == {"^"}
120 and lines[i].strip()
121 ):
122 break
123 i += 1
124 block: BulletBlock = list(lines[bstart:i])
125 blocks_with_pos.append((bstart, block))
126 return ScannedBlocks(blocks_with_pos, i)
127
128
129def read_text(path: str) -> str:
130 with io.open(path, "r", encoding="utf-8") as f:
131 return f.read()
132
133
134def write_text(path: str, content: str) -> None:
135 with io.open(path, "w", encoding="utf-8", newline="") as f:
136 f.write(content)
137
138
139def _normalize_list_rst_lines(lines: Sequence[str]) -> List[str]:
140 """Return normalized content of checks list.rst as a list of lines."""
141 out: List[str] = []
142 i = 0
143 n = len(lines)
144
145 def check_name(line: str) -> Tuple[int, CheckLabel]:
146 if m := DOC_LINE_RE.match(line):
147 return (0, m.group("label"))
148 return (1, "")
149
150 while i < n:
151 line = lines[i]
152 if line.lstrip().startswith(".. csv-table::"):
153 out.append(line)
154 i += 1
155
156 while i < n and (lines[i].startswith(" ") or lines[i].strip() == ""):
157 if DOC_LINE_RE.match(lines[i]):
158 break
159 out.append(lines[i])
160 i += 1
161
162 entries: List[str] = []
163 while i < n and lines[i].startswith(" "):
164 entries.append(lines[i])
165 i += 1
166
167 entries_sorted = sorted(entries, key=check_name)
168 out.extend(entries_sorted)
169 continue
170
171 out.append(line)
172 i += 1
173
174 return out
175
176
177def normalize_list_rst(data: str) -> str:
178 """Normalize list.rst content and return a string."""
179 lines = data.splitlines(True)
180 return "".join(_normalize_list_rst_lines(lines))
181
182
183def find_heading(lines: Sequence[str], title: str) -> Optional[int]:
184 """Find heading start index for a section underlined with ^ characters.
185
186 The function looks for a line equal to `title` followed by a line that
187 consists solely of ^, which matches the ReleaseNotes style for subsection
188 headings used here.
189
190 Returns index of the title line, or None if not found.
191 """
192 for i in range(len(lines) - 1):
193 if lines[i].rstrip("\n") == title:
194 if (
195 (underline := lines[i + 1].rstrip("\n"))
196 and set(underline) == {"^"}
197 and len(underline) == len(title)
198 ):
199 return i
200 return None
201
202
203def extract_label(text: str) -> str:
204 if m := DOC_LABEL_RN_RE.search(text):
205 return m.group("label")
206 return text
207
208
209def _is_bullet_start(line: str) -> bool:
210 return line.startswith("- ")
211
212
213def _parse_bullet_blocks(lines: Sequence[str], start: int, end: int) -> BulletBlocks:
214 i = start
215 n = end
216 first_bullet = i
217 while first_bullet < n and not _is_bullet_start(lines[first_bullet]):
218 first_bullet += 1
219 prefix: Lines = list(lines[i:first_bullet])
220
221 blocks: List[BulletItem] = []
222 res = _scan_bullet_blocks(lines, first_bullet, n)
223 for _, block in res.blocks_with_pos:
224 key: CheckLabel = extract_label(block[0])
225 blocks.append((key, block))
226
227 suffix: Lines = list(lines[res.next_index : n])
228 return BulletBlocks(prefix, blocks, suffix)
229
230
231def sort_blocks(blocks: Iterable[BulletItem]) -> List[BulletBlock]:
232 """Return blocks sorted deterministically by their extracted label.
233
234 Duplicates are preserved; merging is left to authors to handle manually.
235 """
236 return list(map(itemgetter(1), sorted(blocks, key=itemgetter(0))))
237
238
240 lines: Sequence[str], title: str
241) -> List[Tuple[CheckLabel, DuplicateOccurrences]]:
242 """Return detailed duplicate info as (key, [(start_idx, block_lines), ...]).
243
244 start_idx is the 0-based index of the first line of the bullet block in
245 the original lines list. Only keys with more than one occurrence are
246 returned, and occurrences are listed in the order they appear.
247 """
248 bounds = _find_section_bounds(lines, title, None)
249 if bounds is None:
250 return []
251 _, sec_start, sec_end = bounds
252
253 i = sec_start
254 n = sec_end
255
256 while i < n and not _is_bullet_start(lines[i]):
257 i += 1
258
259 blocks_with_pos: List[Tuple[CheckLabel, BulletStart, BulletBlock]] = []
260 res = _scan_bullet_blocks(lines, i, n)
261 for bstart, block in res.blocks_with_pos:
262 key = extract_label(block[0])
263 blocks_with_pos.append((key, bstart, block))
264
265 grouped: DefaultDict[CheckLabel, DuplicateOccurrences] = defaultdict(list)
266 for key, start, block in blocks_with_pos:
267 grouped[key].append((start, block))
268
269 result: List[Tuple[CheckLabel, DuplicateOccurrences]] = []
270 for key, occs in grouped.items():
271 if len(occs) > 1:
272 result.append((key, occs))
273
274 result.sort(key=itemgetter(0))
275 return result
276
277
279 lines: Sequence[str], title: str, next_title: Optional[str]
280) -> Optional[Tuple[int, int, int]]:
281 """Return (h_start, sec_start, sec_end) for section `title`.
282
283 - h_start: index of the section title line
284 - sec_start: index of the first content line after underline
285 - sec_end: index of the first line of the next section title (or end)
286 """
287 if (h_start := find_heading(lines, title)) is None:
288 return None
289
290 sec_start = h_start + 2
291
292 # Determine end of section either from next_title or by scanning.
293 if next_title is not None:
294 if (h_end := find_heading(lines, next_title)) is None:
295 # Scan forward to the next heading-like underline.
296 h_end = sec_start
297 while h_end + 1 < len(lines):
298 if lines[h_end].strip() and set(lines[h_end + 1].rstrip("\n")) == {"^"}:
299 break
300 h_end += 1
301 sec_end = h_end
302 else:
303 # Scan to end or until a heading underline is found.
304 h_end = sec_start
305 while h_end + 1 < len(lines):
306 if lines[h_end].strip() and set(lines[h_end + 1].rstrip("\n")) == {"^"}:
307 break
308 h_end += 1
309 sec_end = h_end
310
311 return h_start, sec_start, sec_end
312
313
315 lines: Sequence[str], title: str, next_title: Optional[str]
316) -> List[str]:
317 """Normalize a single release-notes section and return updated lines."""
318 if (bounds := _find_section_bounds(lines, title, next_title)) is None:
319 return list(lines)
320 _, sec_start, sec_end = bounds
321
322 prefix, blocks, suffix = _parse_bullet_blocks(lines, sec_start, sec_end)
323 sorted_blocks = sort_blocks(blocks)
324
325 new_section: List[str] = []
326 new_section.extend(prefix)
327 for i_b, b in enumerate(sorted_blocks):
328 if i_b > 0 and (
329 not new_section or (new_section and new_section[-1].strip() != "")
330 ):
331 new_section.append("\n")
332 new_section.extend(b)
333 new_section.extend(suffix)
334
335 return list(lines[:sec_start]) + new_section + list(lines[sec_end:])
336
337
338def normalize_release_notes(lines: Sequence[str]) -> str:
339 sections = ["New checks", "New check aliases", "Changes in existing checks"]
340
341 out = list(lines)
342
343 for idx in range(len(sections) - 1, -1, -1):
344 title = sections[idx]
345 next_title = sections[idx + 1] if idx + 1 < len(sections) else None
346 out = _normalize_release_notes_section(out, title, next_title)
347
348 return "".join(out)
349
350
351def _emit_duplicate_report(lines: Sequence[str], title: str) -> Optional[str]:
352 if not (dups_detail := find_duplicate_entries(lines, title)):
353 return None
354 out: List[str] = []
355 out.append(f"Error: Duplicate entries in '{title}':\n")
356 for key, occs in dups_detail:
357 out.append(f"\n-- Duplicate: {key}\n")
358 for start_idx, block in occs:
359 out.append(f"- At line {start_idx + 1}:\n")
360 out.append("".join(block))
361 if not (block and block[-1].endswith("\n")):
362 out.append("\n")
363 return "".join(out)
364
365
366def process_release_notes(out_path: str, rn_doc: str) -> int:
367 text = read_text(rn_doc)
368 lines = text.splitlines(True)
369 normalized = normalize_release_notes(lines)
370 write_text(out_path, normalized)
371
372 # Prefer reporting ordering issues first; let diff fail the test.
373 if text != normalized:
374 sys.stderr.write(
375 "\nEntries in 'clang-tools-extra/docs/ReleaseNotes.rst' are not alphabetically sorted.\n"
376 "Fix the ordering by applying diff printed below.\n\n"
377 )
378 return 0
379
380 # Ordering is clean then enforce duplicates.
381 if report := _emit_duplicate_report(lines, "Changes in existing checks"):
382 sys.stderr.write(report)
383 return 3
384 return 0
385
386
387def process_checks_list(out_path: str, list_doc: str) -> int:
388 text = read_text(list_doc)
389 normalized = normalize_list_rst(text)
390
391 if text != normalized:
392 sys.stderr.write(
393 "\nChecks in 'clang-tools-extra/docs/clang-tidy/checks/list.rst' csv-table are not alphabetically sorted.\n"
394 "Fix the ordering by applying diff printed below.\n\n"
395 )
396
397 write_text(out_path, normalized)
398 return 0
399
400
401def main(argv: Sequence[str]) -> int:
402 ap = argparse.ArgumentParser()
403 ap.add_argument("-o", "--output", dest="out", default=None)
404 args = ap.parse_args(argv)
405
406 list_doc, rn_doc = (os.path.normpath(LIST_DOC), os.path.normpath(RELEASE_NOTES_DOC))
407
408 if args.out:
409 out_path = args.out
410 out_lower = os.path.basename(out_path).lower()
411 if "release" in out_lower:
412 return process_release_notes(out_path, rn_doc)
413 else:
414 return process_checks_list(out_path, list_doc)
415
416 process_checks_list(list_doc, list_doc)
417 return process_release_notes(rn_doc, rn_doc)
418
419
420if __name__ == "__main__":
421 sys.exit(main(sys.argv[1:]))
List[Tuple[CheckLabel, DuplicateOccurrences]] find_duplicate_entries(Sequence[str] lines, str title)
List[BulletBlock] sort_blocks(Iterable[BulletItem] blocks)
List[str] _normalize_list_rst_lines(Sequence[str] lines)
Optional[int] find_heading(Sequence[str] lines, str title)
None write_text(str path, str content)
Optional[Tuple[int, int, int]] _find_section_bounds(Sequence[str] lines, str title, Optional[str] next_title)
int process_checks_list(str out_path, str list_doc)
List[str] _normalize_release_notes_section(Sequence[str] lines, str title, Optional[str] next_title)
Optional[str] _emit_duplicate_report(Sequence[str] lines, str title)
str normalize_release_notes(Sequence[str] lines)
ScannedBlocks _scan_bullet_blocks(Sequence[str] lines, int start, int end)
int process_release_notes(str out_path, str rn_doc)
BulletBlocks _parse_bullet_blocks(Sequence[str] lines, int start, int end)