clang 19.0.0git
Macros.h
Go to the documentation of this file.
1//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the main building blocks of macro support in
11/// clang-format.
12///
13/// In order to not violate the requirement that clang-format can format files
14/// in isolation, clang-format's macro support uses expansions users provide
15/// as part of clang-format's style configuration.
16///
17/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18/// one level of expansion (\see MacroExpander for a full description of what
19/// is supported).
20///
21/// As part of parsing, clang-format uses the MacroExpander to expand the
22/// spelled token streams into expanded token streams when it encounters a
23/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24/// from the expanded token stream.
25/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26/// matches the spelled token stream into unwrapped lines that best resemble the
27/// structure of the expanded unwrapped lines. These reconstructed unwrapped
28/// lines are aliasing the tokens in the expanded token stream, so that token
29/// annotations will be reused when formatting the spelled macro calls.
30///
31/// When formatting, clang-format annotates and formats the expanded unwrapped
32/// lines first, determining the token types. Next, it formats the spelled
33/// unwrapped lines, keeping the token types fixed, while allowing other
34/// formatting decisions to change.
35///
36//===----------------------------------------------------------------------===//
37
38#ifndef CLANG_LIB_FORMAT_MACROS_H
39#define CLANG_LIB_FORMAT_MACROS_H
40
41#include <list>
42#include <map>
43#include <string>
44#include <vector>
45
46#include "FormatToken.h"
47#include "llvm/ADT/ArrayRef.h"
48#include "llvm/ADT/DenseMap.h"
49#include "llvm/ADT/SmallVector.h"
50#include "llvm/ADT/StringRef.h"
51
52namespace clang {
53namespace format {
54
55struct UnwrappedLine;
56struct UnwrappedLineNode;
57
58/// Takes a set of macro definitions as strings and allows expanding calls to
59/// those macros.
60///
61/// For example:
62/// Definition: A(x, y)=x + y
63/// Call : A(int a = 1, 2)
64/// Expansion : int a = 1 + 2
65///
66/// Expansion does not check arity of the definition.
67/// If fewer arguments than expected are provided, the remaining parameters
68/// are considered empty:
69/// Call : A(a)
70/// Expansion: a +
71/// If more arguments than expected are provided, they will be discarded.
72///
73/// The expander does not support:
74/// - recursive expansion
75/// - stringification
76/// - concatenation
77/// - variadic macros
78///
79/// Furthermore, only a single expansion of each macro argument is supported,
80/// so that we cannot get conflicting formatting decisions from different
81/// expansions.
82/// Definition: A(x)=x+x
83/// Call : A(id)
84/// Expansion : id+x
85///
87public:
89
90 /// Construct a macro expander from a set of macro definitions.
91 /// Macro definitions must be encoded as UTF-8.
92 ///
93 /// Each entry in \p Macros must conform to the following simple
94 /// macro-definition language:
95 /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
96 /// <params> ::= <id-list> | ""
97 /// <id-list> ::= <id> | <id> "," <params>
98 /// <expansion> ::= "=" <tail> | <eof>
99 /// <tail> ::= <tok> <tail> | <eof>
100 ///
101 /// Macros that cannot be parsed will be silently discarded.
102 ///
103 MacroExpander(const std::vector<std::string> &Macros,
104 clang::SourceManager &SourceMgr, const FormatStyle &Style,
105 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
106 IdentifierTable &IdentTable);
108
109 /// Returns whether any macro \p Name is defined, regardless of overloads.
110 bool defined(llvm::StringRef Name) const;
111
112 /// Returns whetherh there is an object-like overload, i.e. where the macro
113 /// has no arguments and should not consume subsequent parentheses.
114 bool objectLike(llvm::StringRef Name) const;
115
116 /// Returns whether macro \p Name provides an overload with the given arity.
117 bool hasArity(llvm::StringRef Name, unsigned Arity) const;
118
119 /// Returns the expanded stream of format tokens for \p ID, where
120 /// each element in \p Args is a positional argument to the macro call.
121 /// If \p Args is not set, the object-like overload is used.
122 /// If \p Args is set, the overload with the arity equal to \c Args.size() is
123 /// used.
125 expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
126
127private:
128 struct Definition;
129 class DefinitionParser;
130
131 void parseDefinition(const std::string &Macro);
132
133 clang::SourceManager &SourceMgr;
134 const FormatStyle &Style;
135 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
136 IdentifierTable &IdentTable;
138 llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
139 llvm::StringMap<Definition> ObjectLike;
140};
141
142/// Converts a sequence of UnwrappedLines containing expanded macros into a
143/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
144/// broken into child lines, in a way that best conveys the structure of the
145/// expanded code.
146///
147/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
148/// expanding it we have one expanded UnwrappedLine. In general, macro
149/// expansions can span UnwrappedLines, and multiple macros can contribute
150/// tokens to the same line. We keep consuming expanded lines until:
151/// * all expansions that started have finished (we're not chopping any macros
152/// in half)
153/// * *and* we've reached the end of a *spelled* unwrapped line.
154///
155/// A single UnwrappedLine represents this chunk of code.
156///
157/// After this point, the state of the spelled/expanded stream is "in sync"
158/// (both at the start of an UnwrappedLine, with no macros open), so the
159/// Reconstructor can be thrown away and parsing can continue.
160///
161/// Given a mapping from the macro name identifier token in the macro call
162/// to the tokens of the macro call, for example:
163/// CLASSA -> CLASSA({public: void x();})
164///
165/// When getting the formatted lines of the expansion via the \c addLine method
166/// (each '->' specifies a call to \c addLine ):
167/// -> class A {
168/// -> public:
169/// -> void x();
170/// -> };
171///
172/// Creates the tree of unwrapped lines containing the macro call tokens so that
173/// the macro call tokens fit the semantic structure of the expanded formatted
174/// lines:
175/// -> CLASSA({
176/// -> public:
177/// -> void x();
178/// -> })
180public:
181 /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
182 /// \p Level, using the map from name identifier token to the corresponding
183 /// tokens of the spelled macro call.
185 unsigned Level,
186 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
187 &ActiveExpansions);
188
189 /// For the given \p Line, match all occurences of tokens expanded from a
190 /// macro to unwrapped lines in the spelled macro call so that the resulting
191 /// tree of unwrapped lines best resembles the structure of unwrapped lines
192 /// passed in via \c addLine.
193 void addLine(const UnwrappedLine &Line);
194
195 /// Check whether at the current state there is no open macro expansion
196 /// that needs to be processed to finish an macro call.
197 /// Only when \c finished() is true, \c takeResult() can be called to retrieve
198 /// the resulting \c UnwrappedLine.
199 /// If there are multiple subsequent macro calls within an unwrapped line in
200 /// the spelled token stream, the calling code may also continue to call
201 /// \c addLine() when \c finished() is true.
202 bool finished() const { return ActiveExpansions.empty(); }
203
204 /// Retrieve the formatted \c UnwrappedLine containing the orginal
205 /// macro calls, formatted according to the expanded token stream received
206 /// via \c addLine().
207 /// Generally, this line tries to have the same structure as the expanded,
208 /// formatted unwrapped lines handed in via \c addLine(), with the exception
209 /// that for multiple top-level lines, each subsequent line will be the
210 /// child of the last token in its predecessor. This representation is chosen
211 /// because it is a precondition to the formatter that we get what looks like
212 /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
213 ///
214 /// If a token in a macro argument is a child of a token in the expansion,
215 /// the parent will be the corresponding token in the macro call.
216 /// For example:
217 /// #define C(a, b) class C { a b
218 /// C(int x;, int y;)
219 /// would expand to
220 /// class C { int x; int y;
221 /// where in a formatted line "int x;" and "int y;" would both be new separate
222 /// lines.
223 ///
224 /// In the result, "int x;" will be a child of the opening parenthesis in "C("
225 /// and "int y;" will be a child of the "," token:
226 /// C (
227 /// \- int x;
228 /// ,
229 /// \- int y;
230 /// )
232
233private:
234 void add(FormatToken *Token, FormatToken *ExpandedParent, bool First,
235 unsigned Level);
236 void prepareParent(FormatToken *ExpandedParent, bool First, unsigned Level);
237 FormatToken *getParentInResult(FormatToken *Parent);
238 void reconstruct(FormatToken *Token);
239 void startReconstruction(FormatToken *Token);
240 bool reconstructActiveCallUntil(FormatToken *Token);
241 void endReconstruction(FormatToken *Token);
242 bool processNextReconstructed();
243 void finalize();
244
245 struct ReconstructedLine;
246
247 void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
248 UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
249 void debug(const ReconstructedLine &Line, int Level);
250 ReconstructedLine &parentLine();
251 ReconstructedLine *currentLine();
252 void debugParentMap() const;
253
254#ifndef NDEBUG
255 enum ReconstructorState {
256 Start, // No macro expansion was found in the input yet.
257 InProgress, // During a macro reconstruction.
258 Finalized, // Past macro reconstruction, the result is finalized.
259 };
260 ReconstructorState State = Start;
261#endif
262
263 // Node in which we build up the resulting unwrapped line; this type is
264 // analogous to UnwrappedLineNode.
265 struct LineNode {
266 LineNode() = default;
267 LineNode(FormatToken *Tok) : Tok(Tok) {}
268 FormatToken *Tok = nullptr;
270 };
271
272 // Line in which we build up the resulting unwrapped line.
273 // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
274 // instead of rolling our own type.
275 struct ReconstructedLine {
276 explicit ReconstructedLine(unsigned Level) : Level(Level) {}
277 unsigned Level;
279 };
280
281 // The line in which we collect the resulting reconstructed output.
282 // To reduce special cases in the algorithm, the first level of the line
283 // contains a single null token that has the reconstructed incoming
284 // lines as children.
285 // In the end, we stich the lines together so that each subsequent line
286 // is a child of the last token of the previous line. This is necessary
287 // in order to format the overall expression as a single logical line -
288 // if we created separate lines, we'd format them with their own top-level
289 // indent depending on the semantic structure, which is not desired.
290 ReconstructedLine Result;
291
292 // Stack of currently "open" lines, where each line's predecessor's last
293 // token is the parent token for that line.
294 llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines;
295
296 // Maps from the expanded token to the token that takes its place in the
297 // reconstructed token stream in terms of parent-child relationships.
298 // Note that it might take multiple steps to arrive at the correct
299 // parent in the output.
300 // Given: #define C(a, b) []() { a; b; }
301 // And a call: C(f(), g())
302 // The structure in the incoming formatted unwrapped line will be:
303 // []() {
304 // |- f();
305 // \- g();
306 // }
307 // with f and g being children of the opening brace.
308 // In the reconstructed call:
309 // C(f(), g())
310 // \- f()
311 // \- g()
312 // We want f to be a child of the opening parenthesis and g to be a child
313 // of the comma token in the macro call.
314 // Thus, we map
315 // { -> (
316 // and add
317 // ( -> ,
318 // once we're past the comma in the reconstruction.
319 llvm::DenseMap<FormatToken *, FormatToken *>
320 SpelledParentToReconstructedParent;
321
322 // Keeps track of a single expansion while we're reconstructing tokens it
323 // generated.
324 struct Expansion {
325 // The identifier token of the macro call.
326 FormatToken *ID;
327 // Our current position in the reconstruction.
328 std::list<UnwrappedLineNode>::iterator SpelledI;
329 // The end of the reconstructed token sequence.
330 std::list<UnwrappedLineNode>::iterator SpelledE;
331 };
332
333 // Stack of macro calls for which we're in the middle of an expansion.
334 llvm::SmallVector<Expansion> ActiveExpansions;
335
336 struct MacroCallState {
337 MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
338 FormatToken *MacroCallLParen);
339
340 ReconstructedLine *Line;
341
342 // The last token in the parent line or expansion, or nullptr if the macro
343 // expansion is on a top-level line.
344 //
345 // For example, in the macro call:
346 // auto f = []() { ID(1); };
347 // The MacroCallState for ID will have '{' as ParentLastToken.
348 //
349 // In the macro call:
350 // ID(ID(void f()));
351 // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
352 // while the MacroCallState for the inner ID will have the '(' of the outer
353 // ID as ParentLastToken.
354 //
355 // In the macro call:
356 // ID2(a, ID(b));
357 // The MacroCallState of ID will have ',' as ParentLastToken.
358 FormatToken *ParentLastToken;
359
360 // The l_paren of this MacroCallState's macro call.
361 FormatToken *MacroCallLParen;
362 };
363
364 // Keeps track of the lines into which the opening brace/parenthesis &
365 // argument separating commas for each level in the macro call go in order to
366 // put the corresponding closing brace/parenthesis into the same line in the
367 // output and keep track of which parents in the expanded token stream map to
368 // which tokens in the reconstructed stream.
369 // When an opening brace/parenthesis has children, we want the structure of
370 // the output line to be:
371 // |- MACRO
372 // |- (
373 // | \- <argument>
374 // |- ,
375 // | \- <argument>
376 // \- )
377 llvm::SmallVector<MacroCallState> MacroCallStructure;
378
379 // Maps from identifier of the macro call to an unwrapped line containing
380 // all tokens of the macro call.
381 const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
382 &IdToReconstructed;
383};
384
385} // namespace format
386} // namespace clang
387
388#endif
NodeId Parent
Definition: ASTDiff.cpp:191
static char ID
Definition: Arena.cpp:183
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
Implements an efficient mapping from strings to IdentifierInfo nodes.
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
Converts a sequence of UnwrappedLines containing expanded macros into a single UnwrappedLine containi...
Definition: Macros.h:179
void addLine(const UnwrappedLine &Line)
For the given Line, match all occurences of tokens expanded from a macro to unwrapped lines in the sp...
UnwrappedLine takeResult() &&
Retrieve the formatted UnwrappedLine containing the orginal macro calls, formatted according to the e...
bool finished() const
Check whether at the current state there is no open macro expansion that needs to be processed to fin...
Definition: Macros.h:202
Takes a set of macro definitions as strings and allows expanding calls to those macros.
Definition: Macros.h:86
bool defined(llvm::StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
llvm::SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool objectLike(llvm::StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
bool hasArity(llvm::StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
The JSON file list parser is used to communicate input to InstallAPI.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:282
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...