clang 19.0.0git
UnwrappedLineParser.h
Go to the documentation of this file.
1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
16#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
17
18#include "Macros.h"
19#include <stack>
20
21namespace clang {
22namespace format {
23
24struct UnwrappedLineNode;
25
26/// An unwrapped line is a sequence of \c Token, that we would like to
27/// put on a single line if there was no column limit.
28///
29/// This is used as a main interface between the \c UnwrappedLineParser and the
30/// \c UnwrappedLineFormatter. The key property is that changing the formatting
31/// within an unwrapped line does not affect any other unwrapped lines.
33 UnwrappedLine() = default;
34
35 /// The \c Tokens comprising this \c UnwrappedLine.
36 std::list<UnwrappedLineNode> Tokens;
37
38 /// The indent level of the \c UnwrappedLine.
39 unsigned Level = 0;
40
41 /// The \c PPBranchLevel (adjusted for header guards) if this line is a
42 /// \c InMacroBody line, and 0 otherwise.
43 unsigned PPLevel = 0;
44
45 /// Whether this \c UnwrappedLine is part of a preprocessor directive.
46 bool InPPDirective = false;
47 /// Whether this \c UnwrappedLine is part of a pramga directive.
48 bool InPragmaDirective = false;
49 /// Whether it is part of a macro body.
50 bool InMacroBody = false;
51
52 bool MustBeDeclaration = false;
53
54 /// Whether the parser has seen \c decltype(auto) in this line.
55 bool SeenDecltypeAuto = false;
56
57 /// \c True if this line should be indented by ContinuationIndent in
58 /// addition to the normal indention level.
59 bool IsContinuation = false;
60
61 /// If this \c UnwrappedLine closes a block in a sequence of lines,
62 /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
63 /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
64 /// \c kInvalidIndex.
66
67 /// If this \c UnwrappedLine opens a block, stores the index of the
68 /// line with the corresponding closing brace.
70
71 static const size_t kInvalidIndex = -1;
72
73 unsigned FirstStartColumn = 0;
74};
75
76/// Interface for users of the UnwrappedLineParser to receive the parsed lines.
77/// Parsing a single snippet of code can lead to multiple runs, where each
78/// run is a coherent view of the file.
79///
80/// For example, different runs are generated:
81/// - for different combinations of #if blocks
82/// - when macros are involved, for the expanded code and the as-written code
83///
84/// Some tokens will only be visible in a subset of the runs.
85/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine
86/// for each parsed unwrapped line, and then \c finishRun to indicate
87/// that the set of unwrapped lines before is one coherent view of the
88/// code snippet to be formatted.
90public:
92 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
93 virtual void finishRun() = 0;
94};
95
97
99public:
100 UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
101 const AdditionalKeywords &Keywords,
102 unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
103 UnwrappedLineConsumer &Callback,
104 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
105 IdentifierTable &IdentTable);
106
107 void parse();
108
109private:
110 enum class IfStmtKind {
111 NotIf, // Not an if statement.
112 IfOnly, // An if statement without the else clause.
113 IfElse, // An if statement followed by else but not else if.
114 IfElseIf // An if statement followed by else if.
115 };
116
117 void reset();
118 void parseFile();
119 bool precededByCommentOrPPDirective() const;
120 bool parseLevel(const FormatToken *OpeningBrace = nullptr,
121 IfStmtKind *IfKind = nullptr,
122 FormatToken **IfLeftBrace = nullptr);
123 bool mightFitOnOneLine(UnwrappedLine &Line,
124 const FormatToken *OpeningBrace = nullptr) const;
125 FormatToken *parseBlock(bool MustBeDeclaration = false,
126 unsigned AddLevels = 1u, bool MunchSemi = true,
127 bool KeepBraces = true, IfStmtKind *IfKind = nullptr,
128 bool UnindentWhitesmithsBraces = false);
129 void parseChildBlock();
130 void parsePPDirective();
131 void parsePPDefine();
132 void parsePPIf(bool IfDef);
133 void parsePPElse();
134 void parsePPEndIf();
135 void parsePPPragma();
136 void parsePPUnknown();
137 void readTokenWithJavaScriptASI();
138 void parseStructuralElement(const FormatToken *OpeningBrace = nullptr,
139 IfStmtKind *IfKind = nullptr,
140 FormatToken **IfLeftBrace = nullptr,
141 bool *HasDoWhile = nullptr,
142 bool *HasLabel = nullptr);
143 bool tryToParseBracedList();
144 bool parseBracedList(bool IsAngleBracket = false, bool IsEnum = false);
145 bool parseParens(TokenType AmpAmpTokenType = TT_Unknown);
146 void parseSquare(bool LambdaIntroducer = false);
147 void keepAncestorBraces();
148 void parseUnbracedBody(bool CheckEOF = false);
149 void handleAttributes();
150 bool handleCppAttributes();
151 bool isBlockBegin(const FormatToken &Tok) const;
152 FormatToken *parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces = false,
153 bool IsVerilogAssert = false);
154 void parseTryCatch();
155 void parseLoopBody(bool KeepBraces, bool WrapRightBrace);
156 void parseForOrWhileLoop(bool HasParens = true);
157 void parseDoWhile();
158 void parseLabel(bool LeftAlignLabel = false);
159 void parseCaseLabel();
160 void parseSwitch();
161 void parseNamespace();
162 bool parseModuleImport();
163 void parseNew();
164 void parseAccessSpecifier();
165 bool parseEnum();
166 bool parseStructLike();
167 bool parseRequires();
168 void parseRequiresClause(FormatToken *RequiresToken);
169 void parseRequiresExpression(FormatToken *RequiresToken);
170 void parseConstraintExpression();
171 void parseJavaEnumBody();
172 // Parses a record (aka class) as a top level element. If ParseAsExpr is true,
173 // parses the record as a child block, i.e. if the class declaration is an
174 // expression.
175 void parseRecord(bool ParseAsExpr = false);
176 void parseObjCLightweightGenerics();
177 void parseObjCMethod();
178 void parseObjCProtocolList();
179 void parseObjCUntilAtEnd();
180 void parseObjCInterfaceOrImplementation();
181 bool parseObjCProtocol();
182 void parseJavaScriptEs6ImportExport();
183 void parseStatementMacro();
184 void parseCSharpAttribute();
185 // Parse a C# generic type constraint: `where T : IComparable<T>`.
186 // See:
187 // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint
188 void parseCSharpGenericTypeConstraint();
189 bool tryToParseLambda();
190 bool tryToParseChildBlock();
191 bool tryToParseLambdaIntroducer();
192 bool tryToParsePropertyAccessor();
193 void tryToParseJSFunction();
194 bool tryToParseSimpleAttribute();
195 void parseVerilogHierarchyIdentifier();
196 void parseVerilogSensitivityList();
197 // Returns the number of levels of indentation in addition to the normal 1
198 // level for a block, used for indenting case labels.
199 unsigned parseVerilogHierarchyHeader();
200 void parseVerilogTable();
201 void parseVerilogCaseLabel();
202 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
203 parseMacroCall();
204
205 // Used by addUnwrappedLine to denote whether to keep or remove a level
206 // when resetting the line state.
207 enum class LineLevel { Remove, Keep };
208
209 void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
210 bool eof() const;
211 // LevelDifference is the difference of levels after and before the current
212 // token. For example:
213 // - if the token is '{' and opens a block, LevelDifference is 1.
214 // - if the token is '}' and closes a block, LevelDifference is -1.
215 void nextToken(int LevelDifference = 0);
216 void readToken(int LevelDifference = 0);
217
218 // Decides which comment tokens should be added to the current line and which
219 // should be added as comments before the next token.
220 //
221 // Comments specifies the sequence of comment tokens to analyze. They get
222 // either pushed to the current line or added to the comments before the next
223 // token.
224 //
225 // NextTok specifies the next token. A null pointer NextTok is supported, and
226 // signifies either the absence of a next token, or that the next token
227 // shouldn't be taken into account for the analysis.
228 void distributeComments(const SmallVectorImpl<FormatToken *> &Comments,
229 const FormatToken *NextTok);
230
231 // Adds the comment preceding the next token to unwrapped lines.
232 void flushComments(bool NewlineBeforeNext);
233 void pushToken(FormatToken *Tok);
234 void calculateBraceTypes(bool ExpectClassBody = false);
235 void setPreviousRBraceType(TokenType Type);
236
237 // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
238 // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
239 // this branch either cannot be taken (for example '#if false'), or should
240 // not be taken in this round.
241 void conditionalCompilationCondition(bool Unreachable);
242 void conditionalCompilationStart(bool Unreachable);
243 void conditionalCompilationAlternative();
244 void conditionalCompilationEnd();
245
246 bool isOnNewLine(const FormatToken &FormatTok);
247
248 // Returns whether there is a macro expansion in the line, i.e. a token that
249 // was expanded from a macro call.
250 bool containsExpansion(const UnwrappedLine &Line) const;
251
252 // Compute hash of the current preprocessor branch.
253 // This is used to identify the different branches, and thus track if block
254 // open and close in the same branch.
255 size_t computePPHash() const;
256
257 bool parsingPPDirective() const { return CurrentLines != &Lines; }
258
259 // FIXME: We are constantly running into bugs where Line.Level is incorrectly
260 // subtracted from beyond 0. Introduce a method to subtract from Line.Level
261 // and use that everywhere in the Parser.
262 std::unique_ptr<UnwrappedLine> Line;
263
264 // Lines that are created by macro expansion.
265 // When formatting code containing macro calls, we first format the expanded
266 // lines to set the token types correctly. Afterwards, we format the
267 // reconstructed macro calls, re-using the token types determined in the first
268 // step.
269 // ExpandedLines will be reset every time we create a new LineAndExpansion
270 // instance once a line containing macro calls has been parsed.
271 SmallVector<UnwrappedLine, 8> CurrentExpandedLines;
272
273 // Maps from the first token of a top-level UnwrappedLine that contains
274 // a macro call to the replacement UnwrappedLines expanded from the macro
275 // call.
276 llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines;
277
278 // Map from the macro identifier to a line containing the full unexpanded
279 // macro call.
280 llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
281
282 // For recursive macro expansions, trigger reconstruction only on the
283 // outermost expansion.
284 bool InExpansion = false;
285
286 // Set while we reconstruct a macro call.
287 // For reconstruction, we feed the expanded lines into the reconstructor
288 // until it is finished.
289 std::optional<MacroCallReconstructor> Reconstruct;
290
291 // Comments are sorted into unwrapped lines by whether they are in the same
292 // line as the previous token, or not. If not, they belong to the next token.
293 // Since the next token might already be in a new unwrapped line, we need to
294 // store the comments belonging to that token.
295 SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
296 FormatToken *FormatTok = nullptr;
297 bool MustBreakBeforeNextToken;
298
299 // The parsed lines. Only added to through \c CurrentLines.
301
302 // Preprocessor directives are parsed out-of-order from other unwrapped lines.
303 // Thus, we need to keep a list of preprocessor directives to be reported
304 // after an unwrapped line that has been started was finished.
305 SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
306
307 // New unwrapped lines are added via CurrentLines.
308 // Usually points to \c &Lines. While parsing a preprocessor directive when
309 // there is an unfinished previous unwrapped line, will point to
310 // \c &PreprocessorDirectives.
311 SmallVectorImpl<UnwrappedLine> *CurrentLines;
312
313 // We store for each line whether it must be a declaration depending on
314 // whether we are in a compound statement or not.
315 llvm::BitVector DeclarationScopeStack;
316
317 const FormatStyle &Style;
318 bool IsCpp;
319 const AdditionalKeywords &Keywords;
320
321 llvm::Regex CommentPragmasRegex;
322
323 FormatTokenSource *Tokens;
324 UnwrappedLineConsumer &Callback;
325
326 ArrayRef<FormatToken *> AllTokens;
327
328 // Keeps a stack of the states of nested control statements (true if the
329 // statement contains more than some predefined number of nested statements).
330 SmallVector<bool, 8> NestedTooDeep;
331
332 // Keeps a stack of the states of nested lambdas (true if the return type of
333 // the lambda is `decltype(auto)`).
334 SmallVector<bool, 4> NestedLambdas;
335
336 // Whether the parser is parsing the body of a function whose return type is
337 // `decltype(auto)`.
338 bool IsDecltypeAutoFunction = false;
339
340 // Represents preprocessor branch type, so we can find matching
341 // #if/#else/#endif directives.
342 enum PPBranchKind {
343 PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
344 PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
345 };
346
347 struct PPBranch {
348 PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}
349 PPBranchKind Kind;
350 size_t Line;
351 };
352
353 // Keeps a stack of currently active preprocessor branching directives.
355
356 // The \c UnwrappedLineParser re-parses the code for each combination
357 // of preprocessor branches that can be taken.
358 // To that end, we take the same branch (#if, #else, or one of the #elif
359 // branches) for each nesting level of preprocessor branches.
360 // \c PPBranchLevel stores the current nesting level of preprocessor
361 // branches during one pass over the code.
362 int PPBranchLevel;
363
364 // Contains the current branch (#if, #else or one of the #elif branches)
365 // for each nesting level.
366 SmallVector<int, 8> PPLevelBranchIndex;
367
368 // Contains the maximum number of branches at each nesting level.
369 SmallVector<int, 8> PPLevelBranchCount;
370
371 // Contains the number of branches per nesting level we are currently
372 // in while parsing a preprocessor branch sequence.
373 // This is used to update PPLevelBranchCount at the end of a branch
374 // sequence.
375 std::stack<int> PPChainBranchIndex;
376
377 // Include guard search state. Used to fixup preprocessor indent levels
378 // so that include guards do not participate in indentation.
379 enum IncludeGuardState {
380 IG_Inited, // Search started, looking for #ifndef.
381 IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition.
382 IG_Defined, // Matching #define found, checking other requirements.
383 IG_Found, // All requirements met, need to fix indents.
384 IG_Rejected, // Search failed or never started.
385 };
386
387 // Current state of include guard search.
388 IncludeGuardState IncludeGuard;
389
390 // Points to the #ifndef condition for a potential include guard. Null unless
391 // IncludeGuardState == IG_IfNdefed.
392 FormatToken *IncludeGuardToken;
393
394 // Contains the first start column where the source begins. This is zero for
395 // normal source code and may be nonzero when formatting a code fragment that
396 // does not start at the beginning of the file.
397 unsigned FirstStartColumn;
398
399 MacroExpander Macros;
400
401 friend class ScopedLineState;
403};
404
406 UnwrappedLineNode() : Tok(nullptr) {}
409 : Tok(Tok), Children(Children.begin(), Children.end()) {}
410
413};
414
415std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line);
416
417} // end namespace format
418} // end namespace clang
419
420#endif
This file contains the main building blocks of macro support in clang-format.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This class handles loading and caching of source files into memory.
The base class of the type hierarchy.
Definition: Type.h:1607
Takes a set of macro definitions as strings and allows expanding calls to those macros.
Definition: Macros.h:80
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:202
The JSON file list parser is used to communicate input to InstallAPI.
Represents a complete lambda introducer.
Definition: DeclSpec.h:2830
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:996
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:287
SmallVector< UnwrappedLine, 0 > Children
UnwrappedLineNode(FormatToken *Tok, llvm::ArrayRef< UnwrappedLine > Children={})
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
unsigned PPLevel
The PPBranchLevel (adjusted for header guards) if this line is a InMacroBody line,...
bool InMacroBody
Whether it is part of a macro body.
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
unsigned Level
The indent level of the UnwrappedLine.
bool InPragmaDirective
Whether this UnwrappedLine is part of a pramga directive.
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
bool SeenDecltypeAuto
Whether the parser has seen decltype(auto) in this line.
size_t MatchingClosingBlockLineIndex
If this UnwrappedLine opens a block, stores the index of the line with the corresponding closing brac...
static const size_t kInvalidIndex
size_t MatchingOpeningBlockLineIndex
If this UnwrappedLine closes a block in a sequence of lines, MatchingOpeningBlockLineIndex stores the...