clang-tools  14.0.0git
gen_std.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #===- gen_std.py - ------------------------------------------*- python -*--===#
3 #
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 #
8 #===------------------------------------------------------------------------===#
9 
10 """gen_std.py is a tool to generate a lookup table (from qualified names to
11 include headers) for C/C++ Standard Library symbols by parsing archieved HTML
12 files from cppreference.
13 
14 Caveats and FIXMEs:
15  - only symbols directly in "std" namespace are added, we should also add std's
16  subnamespace symbols (e.g. chrono).
17  - symbols with multiple variants or defined in multiple headers aren't added,
18  e.g. std::move, std::swap
19 
20 Usage:
21  1. Install BeautifulSoup dependency, see instruction:
22  https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
23  2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
24  https://en.cppreference.com/w/Cppreference:Archives
25  3. Unzip the zip file from step 2 to directory </cppreference>, you should
26  get a "reference" directory in </cppreference>
27  4. Run the command:
28  // Generate C++ symbols
29  gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc
30  // Generate C symbols
31  gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc
32 """
33 
34 
35 import cppreference_parser
36 import argparse
37 import datetime
38 import os
39 import sys
40 
41 CODE_PREFIX = """\
42 //===-- gen_std.py generated file -------------------------------*- C++ -*-===//
43 //
44 // Used to build a lookup table (qualified names => include headers) for %s
45 // Standard Library symbols.
46 //
47 // Automatically generated file, DO NOT EDIT!
48 //
49 // Generated from cppreference offline HTML book (modified on %s).
50 //===----------------------------------------------------------------------===//
51 """
52 
53 def ParseArg():
54  parser = argparse.ArgumentParser(description='Generate StdGen file')
55  parser.add_argument('-cppreference', metavar='PATH',
56  default='',
57  help='path to the cppreference offline HTML directory',
58  required=True
59  )
60  parser.add_argument('-language',
61  default='cpp',
62  help='Generate c or cpp symbols',
63  required=True)
64  return parser.parse_args()
65 
66 
67 def main():
68  args = ParseArg()
69  if args.language == 'cpp':
70  page_root = os.path.join(args.cppreference, "en", "cpp")
71  symbol_index_root = os.path.join(page_root, "symbol_index")
72  parse_pages = [
73  (page_root, "symbol_index.html", "std::"),
74  # std sub-namespace symbols have separated pages.
75  # We don't index std literal operators (e.g.
76  # std::literals::chrono_literals::operator""d), these symbols can't be
77  # accessed by std::<symbol_name>.
78  # FIXME: index std::placeholders symbols, placeholders.html page is
79  # different (which contains one entry for _1, _2, ..., _N), we need special
80  # handling.
81  (symbol_index_root, "chrono.html", "std::chrono::"),
82  (symbol_index_root, "filesystem.html", "std::filesystem::"),
83  (symbol_index_root, "pmr.html", "std::pmr::"),
84  (symbol_index_root, "regex_constants.html", "std::regex_constants::"),
85  (symbol_index_root, "this_thread.html", "std::this_thread::"),
86  ]
87  elif args.language == 'c':
88  page_root = os.path.join(args.cppreference, "en", "c")
89  symbol_index_root = page_root
90  parse_pages = [(page_root, "index.html", None)]
91 
92  if not os.path.exists(symbol_index_root):
93  exit("Path %s doesn't exist!" % symbol_index_root)
94 
95  symbols = cppreference_parser.GetSymbols(parse_pages)
96 
97  # We don't have version information from the unzipped offline HTML files.
98  # so we use the modified time of the symbol_index.html as the version.
99  index_page_path = os.path.join(page_root, "index.html")
100  cppreference_modified_date = datetime.datetime.fromtimestamp(
101  os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
102  print CODE_PREFIX % (args.language.upper(), cppreference_modified_date)
103  for symbol in symbols:
104  if len(symbol.headers) == 1:
105  # SYMBOL(unqualified_name, namespace, header)
106  print "SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
107  symbol.headers[0])
108  elif len(symbol.headers) == 0:
109  sys.stderr.write("No header found for symbol %s\n" % symbol.name)
110  else:
111  # FIXME: support symbols with multiple headers (e.g. std::move).
112  sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
113  symbol.name, ', '.join(symbol.headers)))
114 
115 
116 if __name__ == '__main__':
117  main()
gen_std.main
def main()
Definition: gen_std.py:67
clang::tidy::cppcoreguidelines::join
static std::string join(ArrayRef< SpecialMemberFunctionsCheck::SpecialMemberFunctionKind > SMFS, llvm::StringRef AndOr)
Definition: SpecialMemberFunctionsCheck.cpp:78
gen_std.ParseArg
def ParseArg()
Definition: gen_std.py:53
cppreference_parser.GetSymbols
def GetSymbols(parse_pages)
Definition: cppreference_parser.py:155