clang 23.0.0git
UnwrappedLineParser.h
Go to the documentation of this file.
1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
16#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
17
18#include "Macros.h"
19#include <stack>
20
21namespace clang {
22namespace format {
23
25
26/// An unwrapped line is a sequence of \c Token, that we would like to
27/// put on a single line if there was no column limit.
28///
29/// This is used as a main interface between the \c UnwrappedLineParser and the
30/// \c UnwrappedLineFormatter. The key property is that changing the formatting
31/// within an unwrapped line does not affect any other unwrapped lines.
33 UnwrappedLine() = default;
34
35 /// The \c Tokens comprising this \c UnwrappedLine.
36 std::list<UnwrappedLineNode> Tokens;
37
38 /// The indent level of the \c UnwrappedLine.
39 unsigned Level = 0;
40
41 /// The \c PPBranchLevel (adjusted for header guards) if this line is a
42 /// \c InMacroBody line, and 0 otherwise.
43 unsigned PPLevel = 0;
44
45 /// Whether this \c UnwrappedLine is part of a preprocessor directive.
46 bool InPPDirective = false;
47 /// Whether this \c UnwrappedLine is part of a pramga directive.
48 bool InPragmaDirective = false;
49 /// Whether it is part of a macro body.
50 bool InMacroBody = false;
51 /// Whether it is a C++20 module/import declaration.
53
54 /// Nesting level of unbraced body of a control statement.
55 unsigned UnbracedBodyLevel = 0;
56
57 bool MustBeDeclaration = false;
58
59 /// Whether the parser has seen \c decltype(auto) in this line.
60 bool SeenDecltypeAuto = false;
61
62 /// \c True if this line should be indented by ContinuationIndent in
63 /// addition to the normal indention level.
64 bool IsContinuation = false;
65
66 /// If this \c UnwrappedLine closes a block in a sequence of lines,
67 /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
68 /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
69 /// \c kInvalidIndex.
71
72 /// If this \c UnwrappedLine opens a block, stores the index of the
73 /// line with the corresponding closing brace.
75
76 static const size_t kInvalidIndex = -1;
77
78 unsigned FirstStartColumn = 0;
79};
80
81/// Interface for users of the UnwrappedLineParser to receive the parsed lines.
82/// Parsing a single snippet of code can lead to multiple runs, where each
83/// run is a coherent view of the file.
84///
85/// For example, different runs are generated:
86/// - for different combinations of #if blocks
87/// - when macros are involved, for the expanded code and the as-written code
88///
89/// Some tokens will only be visible in a subset of the runs.
90/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine
91/// for each parsed unwrapped line, and then \c finishRun to indicate
92/// that the set of unwrapped lines before is one coherent view of the
93/// code snippet to be formatted.
95public:
97 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
98 virtual void finishRun() = 0;
99};
100
102
104public:
105 UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
106 const AdditionalKeywords &Keywords,
107 unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
108 UnwrappedLineConsumer &Callback,
109 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
110 IdentifierTable &IdentTable);
111
112 void parse();
113
114private:
115 enum class IfStmtKind {
116 NotIf, // Not an if statement.
117 IfOnly, // An if statement without the else clause.
118 IfElse, // An if statement followed by else but not else if.
119 IfElseIf // An if statement followed by else if.
120 };
121
122 void reset();
123 void parseFile();
124 bool precededByCommentOrPPDirective() const;
125 bool parseLevel(const FormatToken *OpeningBrace = nullptr,
126 IfStmtKind *IfKind = nullptr,
127 FormatToken **IfLeftBrace = nullptr);
128 bool mightFitOnOneLine(UnwrappedLine &Line,
129 const FormatToken *OpeningBrace = nullptr) const;
130 FormatToken *parseBlock(bool MustBeDeclaration = false,
131 unsigned AddLevels = 1u, bool MunchSemi = true,
132 bool KeepBraces = true, IfStmtKind *IfKind = nullptr,
133 bool UnindentWhitesmithsBraces = false);
134 void parseChildBlock();
135 void parsePPDirective();
136 void parsePPDefine();
137 void parsePPIf(bool IfDef);
138 void parsePPElse();
139 void parsePPEndIf();
140 void parsePPPragma();
141 void parsePPUnknown();
142 void readTokenWithJavaScriptASI();
143 void parseStructuralElement(const FormatToken *OpeningBrace = nullptr,
144 IfStmtKind *IfKind = nullptr,
145 FormatToken **IfLeftBrace = nullptr,
146 bool *HasDoWhile = nullptr,
147 bool *HasLabel = nullptr);
148 bool tryToParseBracedList();
149 bool parseBracedList(bool IsAngleBracket = false, bool IsEnum = false);
150 bool parseParens(TokenType StarAndAmpTokenType = TT_Unknown,
151 bool InMacroCall = false);
152 void parseSquare(bool LambdaIntroducer = false);
153 void keepAncestorBraces();
154 void parseUnbracedBody(bool CheckEOF = false);
155 void handleAttributes();
156 bool handleCppAttributes();
157 bool isBlockBegin(const FormatToken &Tok) const;
158 FormatToken *parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces = false,
159 bool IsVerilogAssert = false);
160 void parseTryCatch();
161 void parseLoopBody(bool KeepBraces, bool WrapRightBrace);
162 void parseForOrWhileLoop(bool HasParens = true);
163 void parseDoWhile();
164 void parseLabel(FormatStyle::IndentGotoLabelStyle IndentGotoLabels =
165 FormatStyle::IGLS_OuterIndent);
166 void parseCaseLabel();
167 void parseSwitch(bool IsExpr);
168 void parseNamespace();
169 bool parseModuleDecl();
170 bool parseImportDecl();
171 void parseNew();
172 void parseAccessSpecifier();
173 bool parseEnum();
174 bool parseStructLike();
175 bool parseRequires(bool SeenEqual);
176 void parseRequiresClause();
177 void parseRequiresExpression();
178 void parseConstraintExpression();
179 void parseCppExportBlock();
180 void parseNamespaceOrExportBlock(unsigned AddLevels);
181 void parseJavaEnumBody();
182 // Parses a record (aka class) as a top level element. If ParseAsExpr is true,
183 // parses the record as a child block, i.e. if the class declaration is an
184 // expression.
185 void parseRecord(bool ParseAsExpr = false, bool IsJavaRecord = false);
186 void parseObjCLightweightGenerics();
187 void parseObjCMethod();
188 void parseObjCProtocolList();
189 void parseObjCUntilAtEnd();
190 void parseObjCInterfaceOrImplementation();
191 bool parseObjCProtocol();
192 void parseJavaScriptEs6ImportExport();
193 void parseStatementMacro();
194 void parseCSharpAttribute();
195 // Parse a C# generic type constraint: `where T : IComparable<T>`.
196 // See:
197 // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint
198 void parseCSharpGenericTypeConstraint();
199 bool tryToParseLambda();
200 bool tryToParseChildBlock();
201 bool tryToParseLambdaIntroducer();
202 bool tryToParsePropertyAccessor();
203 void tryToParseJSFunction();
204 bool tryToParseSimpleAttribute();
205 void parseVerilogHierarchyIdentifier();
206 void parseVerilogSensitivityList();
207 // Returns the number of levels of indentation in addition to the normal 1
208 // level for a block, used for indenting case labels.
209 unsigned parseVerilogHierarchyHeader();
210 void parseVerilogTable();
211 void parseVerilogCaseLabel();
212 // For import, export, and extern.
213 void parseVerilogExtern();
214 // Skip things that can precede the keywords like module.
215 void skipVerilogQualifiers();
216 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
217 parseMacroCall();
218
219 // Used by addUnwrappedLine to denote whether to keep or remove a level
220 // when resetting the line state.
221 enum class LineLevel { Remove, Keep };
222
223 void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
224 bool eof() const;
225 // LevelDifference is the difference of levels after and before the current
226 // token. For example:
227 // - if the token is '{' and opens a block, LevelDifference is 1.
228 // - if the token is '}' and closes a block, LevelDifference is -1.
229 void nextToken(int LevelDifference = 0);
230 void readToken(int LevelDifference = 0);
231
232 // Decides which comment tokens should be added to the current line and which
233 // should be added as comments before the next token.
234 //
235 // Comments specifies the sequence of comment tokens to analyze. They get
236 // either pushed to the current line or added to the comments before the next
237 // token.
238 //
239 // NextTok specifies the next token. A null pointer NextTok is supported, and
240 // signifies either the absence of a next token, or that the next token
241 // shouldn't be taken into account for the analysis.
242 void distributeComments(const ArrayRef<FormatToken *> &Comments,
243 const FormatToken *NextTok);
244
245 // Adds the comment preceding the next token to unwrapped lines.
246 void flushComments(bool NewlineBeforeNext);
247 void pushToken(FormatToken *Tok);
248 void calculateBraceTypes(bool ExpectClassBody = false);
249 void setPreviousRBraceType(TokenType Type);
250
251 // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
252 // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
253 // this branch either cannot be taken (for example '#if false'), or should
254 // not be taken in this round.
255 void conditionalCompilationCondition(bool Unreachable);
256 void conditionalCompilationStart(bool Unreachable);
257 void conditionalCompilationAlternative();
258 void conditionalCompilationEnd();
259
260 bool isOnNewLine(const FormatToken &FormatTok);
261
262 // Returns whether there is a macro expansion in the line, i.e. a token that
263 // was expanded from a macro call.
264 bool containsExpansion(const UnwrappedLine &Line) const;
265
266 // Compute hash of the current preprocessor branch.
267 // This is used to identify the different branches, and thus track if block
268 // open and close in the same branch.
269 size_t computePPHash() const;
270
271 bool parsingPPDirective() const { return CurrentLines != &Lines; }
272
273 // FIXME: We are constantly running into bugs where Line.Level is incorrectly
274 // subtracted from beyond 0. Introduce a method to subtract from Line.Level
275 // and use that everywhere in the Parser.
276 std::unique_ptr<UnwrappedLine> Line;
277
278 // Lines that are created by macro expansion.
279 // When formatting code containing macro calls, we first format the expanded
280 // lines to set the token types correctly. Afterwards, we format the
281 // reconstructed macro calls, re-using the token types determined in the first
282 // step.
283 // ExpandedLines will be reset every time we create a new LineAndExpansion
284 // instance once a line containing macro calls has been parsed.
285 SmallVector<UnwrappedLine, 8> CurrentExpandedLines;
286
287 // Maps from the first token of a top-level UnwrappedLine that contains
288 // a macro call to the replacement UnwrappedLines expanded from the macro
289 // call.
290 llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines;
291
292 // Map from the macro identifier to a line containing the full unexpanded
293 // macro call.
294 llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
295
296 // For recursive macro expansions, trigger reconstruction only on the
297 // outermost expansion.
298 bool InExpansion = false;
299
300 // Set while we reconstruct a macro call.
301 // For reconstruction, we feed the expanded lines into the reconstructor
302 // until it is finished.
303 std::optional<MacroCallReconstructor> Reconstruct;
304
305 // Comments are sorted into unwrapped lines by whether they are in the same
306 // line as the previous token, or not. If not, they belong to the next token.
307 // Since the next token might already be in a new unwrapped line, we need to
308 // store the comments belonging to that token.
309 SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
310
311 FormatToken *FormatTok = nullptr;
312
313 // Has just finished parsing a preprocessor line.
314 bool AtEndOfPPLine;
315
316 // The parsed lines. Only added to through \c CurrentLines.
318
319 // Preprocessor directives are parsed out-of-order from other unwrapped lines.
320 // Thus, we need to keep a list of preprocessor directives to be reported
321 // after an unwrapped line that has been started was finished.
322 SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
323
324 // New unwrapped lines are added via CurrentLines.
325 // Usually points to \c &Lines. While parsing a preprocessor directive when
326 // there is an unfinished previous unwrapped line, will point to
327 // \c &PreprocessorDirectives.
328 SmallVectorImpl<UnwrappedLine> *CurrentLines;
329
330 // We store for each line whether it must be a declaration depending on
331 // whether we are in a compound statement or not.
332 llvm::BitVector DeclarationScopeStack;
333
334 const FormatStyle &Style;
335 bool IsCpp;
336 LangOptions LangOpts;
337 const AdditionalKeywords &Keywords;
338
339 llvm::Regex CommentPragmasRegex;
340
341 FormatTokenSource *Tokens;
342 UnwrappedLineConsumer &Callback;
343
344 ArrayRef<FormatToken *> AllTokens;
345
346 // Keeps a stack of the states of nested control statements (true if the
347 // statement contains more than some predefined number of nested statements).
348 SmallVector<bool, 8> NestedTooDeep;
349
350 // Keeps a stack of the states of nested lambdas (true if the return type of
351 // the lambda is `decltype(auto)`).
352 SmallVector<bool, 4> NestedLambdas;
353
354 // Whether the parser is parsing the body of a function whose return type is
355 // `decltype(auto)`.
356 bool IsDecltypeAutoFunction = false;
357
358 // Represents preprocessor branch type, so we can find matching
359 // #if/#else/#endif directives.
360 enum PPBranchKind {
361 PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
362 PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
363 };
364
365 struct PPBranch {
366 PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}
367 PPBranchKind Kind;
368 size_t Line;
369 };
370
371 // Keeps a stack of currently active preprocessor branching directives.
373
374 // The \c UnwrappedLineParser re-parses the code for each combination
375 // of preprocessor branches that can be taken.
376 // To that end, we take the same branch (#if, #else, or one of the #elif
377 // branches) for each nesting level of preprocessor branches.
378 // \c PPBranchLevel stores the current nesting level of preprocessor
379 // branches during one pass over the code.
380 int PPBranchLevel;
381
382 // Contains the current branch (#if, #else or one of the #elif branches)
383 // for each nesting level.
384 SmallVector<int, 8> PPLevelBranchIndex;
385
386 // Contains the maximum number of branches at each nesting level.
387 SmallVector<int, 8> PPLevelBranchCount;
388
389 // Contains the number of branches per nesting level we are currently
390 // in while parsing a preprocessor branch sequence.
391 // This is used to update PPLevelBranchCount at the end of a branch
392 // sequence.
393 std::stack<int> PPChainBranchIndex;
394
395 // Include guard search state. Used to fixup preprocessor indent levels
396 // so that include guards do not participate in indentation.
397 enum IncludeGuardState {
398 IG_Inited, // Search started, looking for #ifndef.
399 IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition.
400 IG_Defined, // Matching #define found, checking other requirements.
401 IG_Found, // All requirements met, need to fix indents.
402 IG_Rejected, // Search failed or never started.
403 };
404
405 // Current state of include guard search.
406 IncludeGuardState IncludeGuard;
407
408 IncludeGuardState
409 getIncludeGuardState(FormatStyle::PPDirectiveIndentStyle Style) const {
410 return Style == FormatStyle::PPDIS_None || Style == FormatStyle::PPDIS_Leave
411 ? IG_Rejected
412 : IG_Inited;
413 }
414
415 // Points to the #ifndef condition for a potential include guard. Null unless
416 // IncludeGuardState == IG_IfNdefed.
417 FormatToken *IncludeGuardToken;
418
419 // Contains the first start column where the source begins. This is zero for
420 // normal source code and may be nonzero when formatting a code fragment that
421 // does not start at the beginning of the file.
422 unsigned FirstStartColumn;
423
424 MacroExpander Macros;
425
426 friend class ScopedLineState;
428};
429
439
440std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line);
441
442} // end namespace format
443} // end namespace clang
444
445#endif
Token Tok
The Token.
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
This file contains the main building blocks of macro support in clang-format.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
This class handles loading and caching of source files into memory.
The base class of the type hierarchy.
Definition TypeBase.h:1875
Takes a set of macro definitions as strings and allows expanding calls to those macros.
Definition Macros.h:80
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Represents a complete lambda introducer.
Definition DeclSpec.h:2884
A wrapper around a Token storing information about the whitespace characters preceding it.
SmallVector< UnwrappedLine, 0 > Children
UnwrappedLineNode(FormatToken *Tok, llvm::ArrayRef< UnwrappedLine > Children={})
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
unsigned PPLevel
The PPBranchLevel (adjusted for header guards) if this line is a InMacroBody line,...
bool InMacroBody
Whether it is part of a macro body.
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
unsigned Level
The indent level of the UnwrappedLine.
unsigned UnbracedBodyLevel
Nesting level of unbraced body of a control statement.
bool InPragmaDirective
Whether this UnwrappedLine is part of a pramga directive.
bool IsModuleOrImportDecl
Whether it is a C++20 module/import declaration.
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
bool SeenDecltypeAuto
Whether the parser has seen decltype(auto) in this line.
size_t MatchingClosingBlockLineIndex
If this UnwrappedLine opens a block, stores the index of the line with the corresponding closing brac...
size_t MatchingOpeningBlockLineIndex
If this UnwrappedLine closes a block in a sequence of lines, MatchingOpeningBlockLineIndex stores the...