clang 20.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
61 }
62 }
63 if (!NewLine)
64 OS << "\n";
65}
66
67LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
69}
70
71class ScopedDeclarationState {
72public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
78 }
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
85 }
86
87private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
90};
91
92} // end anonymous namespace
93
94std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
95 llvm::raw_os_ostream OS(Stream);
96 printLine(OS, Line);
97 return Stream;
98}
99
101public:
103 bool SwitchToPreprocessorLines = false)
104 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
105 if (SwitchToPreprocessorLines)
106 Parser.CurrentLines = &Parser.PreprocessorDirectives;
107 else if (!Parser.Line->Tokens.empty())
108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
109 PreBlockLine = std::move(Parser.Line);
110 Parser.Line = std::make_unique<UnwrappedLine>();
111 Parser.Line->Level = PreBlockLine->Level;
112 Parser.Line->PPLevel = PreBlockLine->PPLevel;
113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
116 }
117
119 if (!Parser.Line->Tokens.empty())
120 Parser.addUnwrappedLine();
121 assert(Parser.Line->Tokens.empty());
122 Parser.Line = std::move(PreBlockLine);
123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
124 Parser.MustBreakBeforeNextToken = true;
125 Parser.CurrentLines = OriginalLines;
126 }
127
128private:
130
131 std::unique_ptr<UnwrappedLine> PreBlockLine;
132 SmallVectorImpl<UnwrappedLine> *OriginalLines;
133};
134
136public:
138 const FormatStyle &Style, unsigned &LineLevel)
140 Style.BraceWrapping.AfterControlStatement,
141 Style.BraceWrapping.IndentBraces) {}
143 bool WrapBrace, bool IndentBrace)
144 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
145 if (WrapBrace)
146 Parser->addUnwrappedLine();
147 if (IndentBrace)
148 ++LineLevel;
149 }
150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
151
152private:
153 unsigned &LineLevel;
154 unsigned OldLineLevel;
155};
156
158 SourceManager &SourceMgr, const FormatStyle &Style,
159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
162 IdentifierTable &IdentTable)
163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited),
171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
173 assert(IsCpp == LangOpts.CXXOperatorNames);
174}
175
176void UnwrappedLineParser::reset() {
177 PPBranchLevel = -1;
178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
179 ? IG_Rejected
180 : IG_Inited;
181 IncludeGuardToken = nullptr;
182 Line.reset(new UnwrappedLine);
183 CommentsBeforeNextToken.clear();
184 FormatTok = nullptr;
185 MustBreakBeforeNextToken = false;
186 IsDecltypeAutoFunction = false;
187 PreprocessorDirectives.clear();
188 CurrentLines = &Lines;
189 DeclarationScopeStack.clear();
190 NestedTooDeep.clear();
191 NestedLambdas.clear();
192 PPStack.clear();
193 Line->FirstStartColumn = FirstStartColumn;
194
195 if (!Unexpanded.empty())
196 for (FormatToken *Token : AllTokens)
197 Token->MacroCtx.reset();
198 CurrentExpandedLines.clear();
199 ExpandedLines.clear();
200 Unexpanded.clear();
201 InExpansion = false;
202 Reconstruct.reset();
203}
204
206 IndexedTokenSource TokenSource(AllTokens);
207 Line->FirstStartColumn = FirstStartColumn;
208 do {
209 LLVM_DEBUG(llvm::dbgs() << "----\n");
210 reset();
211 Tokens = &TokenSource;
212 TokenSource.reset();
213
214 readToken();
215 parseFile();
216
217 // If we found an include guard then all preprocessor directives (other than
218 // the guard) are over-indented by one.
219 if (IncludeGuard == IG_Found) {
220 for (auto &Line : Lines)
221 if (Line.InPPDirective && Line.Level > 0)
222 --Line.Level;
223 }
224
225 // Create line with eof token.
226 assert(eof());
227 pushToken(FormatTok);
228 addUnwrappedLine();
229
230 // In a first run, format everything with the lines containing macro calls
231 // replaced by the expansion.
232 if (!ExpandedLines.empty()) {
233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234 for (const auto &Line : Lines) {
235 if (!Line.Tokens.empty()) {
236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
237 if (it != ExpandedLines.end()) {
238 for (const auto &Expanded : it->second) {
239 LLVM_DEBUG(printDebugInfo(Expanded));
240 Callback.consumeUnwrappedLine(Expanded);
241 }
242 continue;
243 }
244 }
245 LLVM_DEBUG(printDebugInfo(Line));
246 Callback.consumeUnwrappedLine(Line);
247 }
248 Callback.finishRun();
249 }
250
251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252 for (const UnwrappedLine &Line : Lines) {
253 LLVM_DEBUG(printDebugInfo(Line));
254 Callback.consumeUnwrappedLine(Line);
255 }
256 Callback.finishRun();
257 Lines.clear();
258 while (!PPLevelBranchIndex.empty() &&
259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
262 }
263 if (!PPLevelBranchIndex.empty()) {
264 ++PPLevelBranchIndex.back();
265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
267 }
268 } while (!PPLevelBranchIndex.empty());
269}
270
271void UnwrappedLineParser::parseFile() {
272 // The top-level context in a file always has declarations, except for pre-
273 // processor directives and JavaScript files.
274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
276 MustBeDeclaration);
278 parseBracedList();
279 else
280 parseLevel();
281 // Make sure to format the remaining tokens.
282 //
283 // LK_TextProto is special since its top-level is parsed as the body of a
284 // braced list, which does not necessarily have natural line separators such
285 // as a semicolon. Comments after the last entry that have been determined to
286 // not belong to that line, as in:
287 // key: value
288 // // endfile comment
289 // do not have a chance to be put on a line of their own until this point.
290 // Here we add this newline before end-of-file comments.
291 if (Style.Language == FormatStyle::LK_TextProto &&
292 !CommentsBeforeNextToken.empty()) {
293 addUnwrappedLine();
294 }
295 flushComments(true);
296 addUnwrappedLine();
297}
298
299void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
300 do {
301 switch (FormatTok->Tok.getKind()) {
302 case tok::l_brace:
303 return;
304 default:
305 if (FormatTok->is(Keywords.kw_where)) {
306 addUnwrappedLine();
307 nextToken();
308 parseCSharpGenericTypeConstraint();
309 break;
310 }
311 nextToken();
312 break;
313 }
314 } while (!eof());
315}
316
317void UnwrappedLineParser::parseCSharpAttribute() {
318 int UnpairedSquareBrackets = 1;
319 do {
320 switch (FormatTok->Tok.getKind()) {
321 case tok::r_square:
322 nextToken();
323 --UnpairedSquareBrackets;
324 if (UnpairedSquareBrackets == 0) {
325 addUnwrappedLine();
326 return;
327 }
328 break;
329 case tok::l_square:
330 ++UnpairedSquareBrackets;
331 nextToken();
332 break;
333 default:
334 nextToken();
335 break;
336 }
337 } while (!eof());
338}
339
340bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341 if (!Lines.empty() && Lines.back().InPPDirective)
342 return true;
343
344 const FormatToken *Previous = Tokens->getPreviousToken();
345 return Previous && Previous->is(tok::comment) &&
346 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
347}
348
349/// \brief Parses a level, that is ???.
350/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351/// \param IfKind The \p if statement kind in the level.
352/// \param IfLeftBrace The left brace of the \p if block in the level.
353/// \returns true if a simple block of if/else/for/while, or false otherwise.
354/// (A simple block has a single statement.)
355bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
356 IfStmtKind *IfKind,
357 FormatToken **IfLeftBrace) {
358 const bool InRequiresExpression =
359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
360 const bool IsPrecededByCommentOrPPDirective =
361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
362 FormatToken *IfLBrace = nullptr;
363 bool HasDoWhile = false;
364 bool HasLabel = false;
365 unsigned StatementCount = 0;
366 bool SwitchLabelEncountered = false;
367
368 do {
369 if (FormatTok->isAttribute()) {
370 nextToken();
371 if (FormatTok->is(tok::l_paren))
372 parseParens();
373 continue;
374 }
375 tok::TokenKind Kind = FormatTok->Tok.getKind();
376 if (FormatTok->is(TT_MacroBlockBegin))
377 Kind = tok::l_brace;
378 else if (FormatTok->is(TT_MacroBlockEnd))
379 Kind = tok::r_brace;
380
381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
382 &HasLabel, &StatementCount] {
383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
384 HasDoWhile ? nullptr : &HasDoWhile,
385 HasLabel ? nullptr : &HasLabel);
386 ++StatementCount;
387 assert(StatementCount > 0 && "StatementCount overflow!");
388 };
389
390 switch (Kind) {
391 case tok::comment:
392 nextToken();
393 addUnwrappedLine();
394 break;
395 case tok::l_brace:
396 if (InRequiresExpression) {
397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
398 } else if (FormatTok->Previous &&
399 FormatTok->Previous->ClosesRequiresClause) {
400 // We need the 'default' case here to correctly parse a function
401 // l_brace.
402 ParseDefault();
403 continue;
404 }
405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
406 if (tryToParseBracedList())
407 continue;
408 FormatTok->setFinalizedType(TT_BlockLBrace);
409 }
410 parseBlock();
411 ++StatementCount;
412 assert(StatementCount > 0 && "StatementCount overflow!");
413 addUnwrappedLine();
414 break;
415 case tok::r_brace:
416 if (OpeningBrace) {
417 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
419 return false;
420 }
421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
422 HasDoWhile || IsPrecededByCommentOrPPDirective ||
423 precededByCommentOrPPDirective()) {
424 return false;
425 }
426 const FormatToken *Next = Tokens->peekNextToken();
427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
428 return false;
429 if (IfLeftBrace)
430 *IfLeftBrace = IfLBrace;
431 return true;
432 }
433 nextToken();
434 addUnwrappedLine();
435 break;
436 case tok::kw_default: {
437 unsigned StoredPosition = Tokens->getPosition();
438 auto *Next = Tokens->getNextNonComment();
439 FormatTok = Tokens->setPosition(StoredPosition);
440 if (!Next->isOneOf(tok::colon, tok::arrow)) {
441 // default not followed by `:` or `->` is not a case label; treat it
442 // like an identifier.
443 parseStructuralElement();
444 break;
445 }
446 // Else, if it is 'default:', fall through to the case handling.
447 [[fallthrough]];
448 }
449 case tok::kw_case:
450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
451 (Style.isJavaScript() && Line->MustBeDeclaration)) {
452 // Proto: there are no switch/case statements
453 // Verilog: Case labels don't have this word. We handle case
454 // labels including default in TokenAnnotator.
455 // JavaScript: A 'case: string' style field declaration.
456 ParseDefault();
457 break;
458 }
459 if (!SwitchLabelEncountered &&
460 (Style.IndentCaseLabels ||
461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
462 (Line->InPPDirective && Line->Level == 1))) {
463 ++Line->Level;
464 }
465 SwitchLabelEncountered = true;
466 parseStructuralElement();
467 break;
468 case tok::l_square:
469 if (Style.isCSharp()) {
470 nextToken();
471 parseCSharpAttribute();
472 break;
473 }
474 if (handleCppAttributes())
475 break;
476 [[fallthrough]];
477 default:
478 ParseDefault();
479 break;
480 }
481 } while (!eof());
482
483 return false;
484}
485
486void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
487 // We'll parse forward through the tokens until we hit
488 // a closing brace or eof - note that getNextToken() will
489 // parse macros, so this will magically work inside macro
490 // definitions, too.
491 unsigned StoredPosition = Tokens->getPosition();
492 FormatToken *Tok = FormatTok;
493 const FormatToken *PrevTok = Tok->Previous;
494 // Keep a stack of positions of lbrace tokens. We will
495 // update information about whether an lbrace starts a
496 // braced init list or a different block during the loop.
497 struct StackEntry {
498 FormatToken *Tok;
499 const FormatToken *PrevTok;
500 };
501 SmallVector<StackEntry, 8> LBraceStack;
502 assert(Tok->is(tok::l_brace));
503
504 do {
505 auto *NextTok = Tokens->getNextNonComment();
506
507 if (!Line->InMacroBody && !Style.isTableGen()) {
508 // Skip PPDirective lines and comments.
509 while (NextTok->is(tok::hash)) {
510 do {
511 NextTok = Tokens->getNextToken();
512 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
513
514 while (NextTok->is(tok::comment))
515 NextTok = Tokens->getNextToken();
516 }
517 }
518
519 switch (Tok->Tok.getKind()) {
520 case tok::l_brace:
521 if (Style.isJavaScript() && PrevTok) {
522 if (PrevTok->isOneOf(tok::colon, tok::less)) {
523 // A ':' indicates this code is in a type, or a braced list
524 // following a label in an object literal ({a: {b: 1}}).
525 // A '<' could be an object used in a comparison, but that is nonsense
526 // code (can never return true), so more likely it is a generic type
527 // argument (`X<{a: string; b: number}>`).
528 // The code below could be confused by semicolons between the
529 // individual members in a type member list, which would normally
530 // trigger BK_Block. In both cases, this must be parsed as an inline
531 // braced init.
533 } else if (PrevTok->is(tok::r_paren)) {
534 // `) { }` can only occur in function or method declarations in JS.
535 Tok->setBlockKind(BK_Block);
536 }
537 } else {
538 Tok->setBlockKind(BK_Unknown);
539 }
540 LBraceStack.push_back({Tok, PrevTok});
541 break;
542 case tok::r_brace:
543 if (LBraceStack.empty())
544 break;
545 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
546 bool ProbablyBracedList = false;
547 if (Style.Language == FormatStyle::LK_Proto) {
548 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
549 } else if (LBrace->isNot(TT_EnumLBrace)) {
550 // Using OriginalColumn to distinguish between ObjC methods and
551 // binary operators is a bit hacky.
552 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
553 NextTok->OriginalColumn == 0;
554
555 // Try to detect a braced list. Note that regardless how we mark inner
556 // braces here, we will overwrite the BlockKind later if we parse a
557 // braced list (where all blocks inside are by default braced lists),
558 // or when we explicitly detect blocks (for example while parsing
559 // lambdas).
560
561 // If we already marked the opening brace as braced list, the closing
562 // must also be part of it.
563 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
564
565 ProbablyBracedList = ProbablyBracedList ||
566 (Style.isJavaScript() &&
567 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
568 Keywords.kw_as));
569 ProbablyBracedList =
570 ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
571
572 // If there is a comma, semicolon or right paren after the closing
573 // brace, we assume this is a braced initializer list.
574 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
575 // braced list in JS.
576 ProbablyBracedList =
577 ProbablyBracedList ||
578 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
579 tok::r_paren, tok::r_square, tok::ellipsis);
580
581 // Distinguish between braced list in a constructor initializer list
582 // followed by constructor body, or just adjacent blocks.
583 ProbablyBracedList =
584 ProbablyBracedList ||
585 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
586 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
587 tok::greater));
588
589 ProbablyBracedList =
590 ProbablyBracedList ||
591 (NextTok->is(tok::identifier) &&
592 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
593
594 ProbablyBracedList = ProbablyBracedList ||
595 (NextTok->is(tok::semi) &&
596 (!ExpectClassBody || LBraceStack.size() != 1));
597
598 ProbablyBracedList =
599 ProbablyBracedList ||
600 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
601
602 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
603 // We can have an array subscript after a braced init
604 // list, but C++11 attributes are expected after blocks.
605 NextTok = Tokens->getNextToken();
606 ProbablyBracedList = NextTok->isNot(tok::l_square);
607 }
608
609 // Cpp macro definition body that is a nonempty braced list or block:
610 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
611 !FormatTok->Previous && NextTok->is(tok::eof) &&
612 // A statement can end with only `;` (simple statement), a block
613 // closing brace (compound statement), or `:` (label statement).
614 // If PrevTok is a block opening brace, Tok ends an empty block.
615 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
616 ProbablyBracedList = true;
617 }
618 }
619 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
620 Tok->setBlockKind(BlockKind);
621 LBrace->setBlockKind(BlockKind);
622 }
623 LBraceStack.pop_back();
624 break;
625 case tok::identifier:
626 if (Tok->isNot(TT_StatementMacro))
627 break;
628 [[fallthrough]];
629 case tok::at:
630 case tok::semi:
631 case tok::kw_if:
632 case tok::kw_while:
633 case tok::kw_for:
634 case tok::kw_switch:
635 case tok::kw_try:
636 case tok::kw___try:
637 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
638 LBraceStack.back().Tok->setBlockKind(BK_Block);
639 break;
640 default:
641 break;
642 }
643
644 PrevTok = Tok;
645 Tok = NextTok;
646 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
647
648 // Assume other blocks for all unclosed opening braces.
649 for (const auto &Entry : LBraceStack)
650 if (Entry.Tok->is(BK_Unknown))
651 Entry.Tok->setBlockKind(BK_Block);
652
653 FormatTok = Tokens->setPosition(StoredPosition);
654}
655
656// Sets the token type of the directly previous right brace.
657void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
658 if (auto Prev = FormatTok->getPreviousNonComment();
659 Prev && Prev->is(tok::r_brace)) {
660 Prev->setFinalizedType(Type);
661 }
662}
663
664template <class T>
665static inline void hash_combine(std::size_t &seed, const T &v) {
666 std::hash<T> hasher;
667 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
668}
669
670size_t UnwrappedLineParser::computePPHash() const {
671 size_t h = 0;
672 for (const auto &i : PPStack) {
673 hash_combine(h, size_t(i.Kind));
674 hash_combine(h, i.Line);
675 }
676 return h;
677}
678
679// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
680// is not null, subtracts its length (plus the preceding space) when computing
681// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
682// running the token annotator on it so that we can restore them afterward.
683bool UnwrappedLineParser::mightFitOnOneLine(
684 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
685 const auto ColumnLimit = Style.ColumnLimit;
686 if (ColumnLimit == 0)
687 return true;
688
689 auto &Tokens = ParsedLine.Tokens;
690 assert(!Tokens.empty());
691
692 const auto *LastToken = Tokens.back().Tok;
693 assert(LastToken);
694
695 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
696
697 int Index = 0;
698 for (const auto &Token : Tokens) {
699 assert(Token.Tok);
700 auto &SavedToken = SavedTokens[Index++];
701 SavedToken.Tok = new FormatToken;
702 SavedToken.Tok->copyFrom(*Token.Tok);
703 SavedToken.Children = std::move(Token.Children);
704 }
705
706 AnnotatedLine Line(ParsedLine);
707 assert(Line.Last == LastToken);
708
709 TokenAnnotator Annotator(Style, Keywords);
710 Annotator.annotate(Line);
711 Annotator.calculateFormattingInformation(Line);
712
713 auto Length = LastToken->TotalLength;
714 if (OpeningBrace) {
715 assert(OpeningBrace != Tokens.front().Tok);
716 if (auto Prev = OpeningBrace->Previous;
717 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
718 Length -= ColumnLimit;
719 }
720 Length -= OpeningBrace->TokenText.size() + 1;
721 }
722
723 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
724 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
725 Length -= FirstToken->TokenText.size() + 1;
726 }
727
728 Index = 0;
729 for (auto &Token : Tokens) {
730 const auto &SavedToken = SavedTokens[Index++];
731 Token.Tok->copyFrom(*SavedToken.Tok);
732 Token.Children = std::move(SavedToken.Children);
733 delete SavedToken.Tok;
734 }
735
736 // If these change PPLevel needs to be used for get correct indentation.
737 assert(!Line.InMacroBody);
738 assert(!Line.InPPDirective);
739 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
740}
741
742FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
743 unsigned AddLevels, bool MunchSemi,
744 bool KeepBraces,
745 IfStmtKind *IfKind,
746 bool UnindentWhitesmithsBraces) {
747 auto HandleVerilogBlockLabel = [this]() {
748 // ":" name
749 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
750 nextToken();
751 if (Keywords.isVerilogIdentifier(*FormatTok))
752 nextToken();
753 }
754 };
755
756 // Whether this is a Verilog-specific block that has a special header like a
757 // module.
758 const bool VerilogHierarchy =
759 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
760 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
761 (Style.isVerilog() &&
762 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
763 "'{' or macro block token expected");
764 FormatToken *Tok = FormatTok;
765 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
766 auto Index = CurrentLines->size();
767 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
768 FormatTok->setBlockKind(BK_Block);
769
770 // For Whitesmiths mode, jump to the next level prior to skipping over the
771 // braces.
772 if (!VerilogHierarchy && AddLevels > 0 &&
774 ++Line->Level;
775 }
776
777 size_t PPStartHash = computePPHash();
778
779 const unsigned InitialLevel = Line->Level;
780 if (VerilogHierarchy) {
781 AddLevels += parseVerilogHierarchyHeader();
782 } else {
783 nextToken(/*LevelDifference=*/AddLevels);
784 HandleVerilogBlockLabel();
785 }
786
787 // Bail out if there are too many levels. Otherwise, the stack might overflow.
788 if (Line->Level > 300)
789 return nullptr;
790
791 if (MacroBlock && FormatTok->is(tok::l_paren))
792 parseParens();
793
794 size_t NbPreprocessorDirectives =
795 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
796 addUnwrappedLine();
797 size_t OpeningLineIndex =
798 CurrentLines->empty()
800 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
801
802 // Whitesmiths is weird here. The brace needs to be indented for the namespace
803 // block, but the block itself may not be indented depending on the style
804 // settings. This allows the format to back up one level in those cases.
805 if (UnindentWhitesmithsBraces)
806 --Line->Level;
807
808 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
809 MustBeDeclaration);
810 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
811 Line->Level += AddLevels;
812
813 FormatToken *IfLBrace = nullptr;
814 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
815
816 if (eof())
817 return IfLBrace;
818
819 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
820 : FormatTok->isNot(tok::r_brace)) {
821 Line->Level = InitialLevel;
822 FormatTok->setBlockKind(BK_Block);
823 return IfLBrace;
824 }
825
826 if (FormatTok->is(tok::r_brace)) {
827 FormatTok->setBlockKind(BK_Block);
828 if (Tok->is(TT_NamespaceLBrace))
829 FormatTok->setFinalizedType(TT_NamespaceRBrace);
830 }
831
832 const bool IsFunctionRBrace =
833 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
834
835 auto RemoveBraces = [=]() mutable {
836 if (!SimpleBlock)
837 return false;
838 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
839 assert(FormatTok->is(tok::r_brace));
840 const bool WrappedOpeningBrace = !Tok->Previous;
841 if (WrappedOpeningBrace && FollowedByComment)
842 return false;
843 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
844 if (KeepBraces && !HasRequiredIfBraces)
845 return false;
846 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
847 const FormatToken *Previous = Tokens->getPreviousToken();
848 assert(Previous);
849 if (Previous->is(tok::r_brace) && !Previous->Optional)
850 return false;
851 }
852 assert(!CurrentLines->empty());
853 auto &LastLine = CurrentLines->back();
854 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
855 return false;
856 if (Tok->is(TT_ElseLBrace))
857 return true;
858 if (WrappedOpeningBrace) {
859 assert(Index > 0);
860 --Index; // The line above the wrapped l_brace.
861 Tok = nullptr;
862 }
863 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
864 };
865 if (RemoveBraces()) {
866 Tok->MatchingParen = FormatTok;
867 FormatTok->MatchingParen = Tok;
868 }
869
870 size_t PPEndHash = computePPHash();
871
872 // Munch the closing brace.
873 nextToken(/*LevelDifference=*/-AddLevels);
874
875 // When this is a function block and there is an unnecessary semicolon
876 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
877 // it later).
878 if (Style.RemoveSemicolon && IsFunctionRBrace) {
879 while (FormatTok->is(tok::semi)) {
880 FormatTok->Optional = true;
881 nextToken();
882 }
883 }
884
885 HandleVerilogBlockLabel();
886
887 if (MacroBlock && FormatTok->is(tok::l_paren))
888 parseParens();
889
890 Line->Level = InitialLevel;
891
892 if (FormatTok->is(tok::kw_noexcept)) {
893 // A noexcept in a requires expression.
894 nextToken();
895 }
896
897 if (FormatTok->is(tok::arrow)) {
898 // Following the } or noexcept we can find a trailing return type arrow
899 // as part of an implicit conversion constraint.
900 nextToken();
901 parseStructuralElement();
902 }
903
904 if (MunchSemi && FormatTok->is(tok::semi))
905 nextToken();
906
907 if (PPStartHash == PPEndHash) {
908 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
909 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
910 // Update the opening line to add the forward reference as well
911 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
912 CurrentLines->size() - 1;
913 }
914 }
915
916 return IfLBrace;
917}
918
919static bool isGoogScope(const UnwrappedLine &Line) {
920 // FIXME: Closure-library specific stuff should not be hard-coded but be
921 // configurable.
922 if (Line.Tokens.size() < 4)
923 return false;
924 auto I = Line.Tokens.begin();
925 if (I->Tok->TokenText != "goog")
926 return false;
927 ++I;
928 if (I->Tok->isNot(tok::period))
929 return false;
930 ++I;
931 if (I->Tok->TokenText != "scope")
932 return false;
933 ++I;
934 return I->Tok->is(tok::l_paren);
935}
936
937static bool isIIFE(const UnwrappedLine &Line,
938 const AdditionalKeywords &Keywords) {
939 // Look for the start of an immediately invoked anonymous function.
940 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
941 // This is commonly done in JavaScript to create a new, anonymous scope.
942 // Example: (function() { ... })()
943 if (Line.Tokens.size() < 3)
944 return false;
945 auto I = Line.Tokens.begin();
946 if (I->Tok->isNot(tok::l_paren))
947 return false;
948 ++I;
949 if (I->Tok->isNot(Keywords.kw_function))
950 return false;
951 ++I;
952 return I->Tok->is(tok::l_paren);
953}
954
955static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
956 const FormatToken &InitialToken) {
957 tok::TokenKind Kind = InitialToken.Tok.getKind();
958 if (InitialToken.is(TT_NamespaceMacro))
959 Kind = tok::kw_namespace;
960
961 switch (Kind) {
962 case tok::kw_namespace:
963 return Style.BraceWrapping.AfterNamespace;
964 case tok::kw_class:
965 return Style.BraceWrapping.AfterClass;
966 case tok::kw_union:
967 return Style.BraceWrapping.AfterUnion;
968 case tok::kw_struct:
969 return Style.BraceWrapping.AfterStruct;
970 case tok::kw_enum:
971 return Style.BraceWrapping.AfterEnum;
972 default:
973 return false;
974 }
975}
976
977void UnwrappedLineParser::parseChildBlock() {
978 assert(FormatTok->is(tok::l_brace));
979 FormatTok->setBlockKind(BK_Block);
980 const FormatToken *OpeningBrace = FormatTok;
981 nextToken();
982 {
983 bool SkipIndent = (Style.isJavaScript() &&
984 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
985 ScopedLineState LineState(*this);
986 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
987 /*MustBeDeclaration=*/false);
988 Line->Level += SkipIndent ? 0 : 1;
989 parseLevel(OpeningBrace);
990 flushComments(isOnNewLine(*FormatTok));
991 Line->Level -= SkipIndent ? 0 : 1;
992 }
993 nextToken();
994}
995
996void UnwrappedLineParser::parsePPDirective() {
997 assert(FormatTok->is(tok::hash) && "'#' expected");
998 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
999
1000 nextToken();
1001
1002 if (!FormatTok->Tok.getIdentifierInfo()) {
1003 parsePPUnknown();
1004 return;
1005 }
1006
1007 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1008 case tok::pp_define:
1009 parsePPDefine();
1010 return;
1011 case tok::pp_if:
1012 parsePPIf(/*IfDef=*/false);
1013 break;
1014 case tok::pp_ifdef:
1015 case tok::pp_ifndef:
1016 parsePPIf(/*IfDef=*/true);
1017 break;
1018 case tok::pp_else:
1019 case tok::pp_elifdef:
1020 case tok::pp_elifndef:
1021 case tok::pp_elif:
1022 parsePPElse();
1023 break;
1024 case tok::pp_endif:
1025 parsePPEndIf();
1026 break;
1027 case tok::pp_pragma:
1028 parsePPPragma();
1029 break;
1030 default:
1031 parsePPUnknown();
1032 break;
1033 }
1034}
1035
1036void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1037 size_t Line = CurrentLines->size();
1038 if (CurrentLines == &PreprocessorDirectives)
1039 Line += Lines.size();
1040
1041 if (Unreachable ||
1042 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1043 PPStack.push_back({PP_Unreachable, Line});
1044 } else {
1045 PPStack.push_back({PP_Conditional, Line});
1046 }
1047}
1048
1049void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1050 ++PPBranchLevel;
1051 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1052 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1053 PPLevelBranchIndex.push_back(0);
1054 PPLevelBranchCount.push_back(0);
1055 }
1056 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1057 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1058 conditionalCompilationCondition(Unreachable || Skip);
1059}
1060
1061void UnwrappedLineParser::conditionalCompilationAlternative() {
1062 if (!PPStack.empty())
1063 PPStack.pop_back();
1064 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1065 if (!PPChainBranchIndex.empty())
1066 ++PPChainBranchIndex.top();
1067 conditionalCompilationCondition(
1068 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1069 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1070}
1071
1072void UnwrappedLineParser::conditionalCompilationEnd() {
1073 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1074 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1075 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1076 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1077 }
1078 // Guard against #endif's without #if.
1079 if (PPBranchLevel > -1)
1080 --PPBranchLevel;
1081 if (!PPChainBranchIndex.empty())
1082 PPChainBranchIndex.pop();
1083 if (!PPStack.empty())
1084 PPStack.pop_back();
1085}
1086
1087void UnwrappedLineParser::parsePPIf(bool IfDef) {
1088 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1089 nextToken();
1090 bool Unreachable = false;
1091 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1092 Unreachable = true;
1093 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1094 Unreachable = true;
1095 conditionalCompilationStart(Unreachable);
1096 FormatToken *IfCondition = FormatTok;
1097 // If there's a #ifndef on the first line, and the only lines before it are
1098 // comments, it could be an include guard.
1099 bool MaybeIncludeGuard = IfNDef;
1100 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1101 for (auto &Line : Lines) {
1102 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1103 MaybeIncludeGuard = false;
1104 IncludeGuard = IG_Rejected;
1105 break;
1106 }
1107 }
1108 }
1109 --PPBranchLevel;
1110 parsePPUnknown();
1111 ++PPBranchLevel;
1112 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1113 IncludeGuard = IG_IfNdefed;
1114 IncludeGuardToken = IfCondition;
1115 }
1116}
1117
1118void UnwrappedLineParser::parsePPElse() {
1119 // If a potential include guard has an #else, it's not an include guard.
1120 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1121 IncludeGuard = IG_Rejected;
1122 // Don't crash when there is an #else without an #if.
1123 assert(PPBranchLevel >= -1);
1124 if (PPBranchLevel == -1)
1125 conditionalCompilationStart(/*Unreachable=*/true);
1126 conditionalCompilationAlternative();
1127 --PPBranchLevel;
1128 parsePPUnknown();
1129 ++PPBranchLevel;
1130}
1131
1132void UnwrappedLineParser::parsePPEndIf() {
1133 conditionalCompilationEnd();
1134 parsePPUnknown();
1135 // If the #endif of a potential include guard is the last thing in the file,
1136 // then we found an include guard.
1137 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1139 IncludeGuard = IG_Found;
1140 }
1141}
1142
1143void UnwrappedLineParser::parsePPDefine() {
1144 nextToken();
1145
1146 if (!FormatTok->Tok.getIdentifierInfo()) {
1147 IncludeGuard = IG_Rejected;
1148 IncludeGuardToken = nullptr;
1149 parsePPUnknown();
1150 return;
1151 }
1152
1153 if (IncludeGuard == IG_IfNdefed &&
1154 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1155 IncludeGuard = IG_Defined;
1156 IncludeGuardToken = nullptr;
1157 for (auto &Line : Lines) {
1158 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1159 IncludeGuard = IG_Rejected;
1160 break;
1161 }
1162 }
1163 }
1164
1165 // In the context of a define, even keywords should be treated as normal
1166 // identifiers. Setting the kind to identifier is not enough, because we need
1167 // to treat additional keywords like __except as well, which are already
1168 // identifiers. Setting the identifier info to null interferes with include
1169 // guard processing above, and changes preprocessing nesting.
1170 FormatTok->Tok.setKind(tok::identifier);
1172 nextToken();
1173 if (FormatTok->Tok.getKind() == tok::l_paren &&
1174 !FormatTok->hasWhitespaceBefore()) {
1175 parseParens();
1176 }
1178 Line->Level += PPBranchLevel + 1;
1179 addUnwrappedLine();
1180 ++Line->Level;
1181
1182 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1183 assert((int)Line->PPLevel >= 0);
1184 Line->InMacroBody = true;
1185
1186 if (Style.SkipMacroDefinitionBody) {
1187 while (!eof()) {
1188 FormatTok->Finalized = true;
1189 FormatTok = Tokens->getNextToken();
1190 }
1191 addUnwrappedLine();
1192 return;
1193 }
1194
1195 // Errors during a preprocessor directive can only affect the layout of the
1196 // preprocessor directive, and thus we ignore them. An alternative approach
1197 // would be to use the same approach we use on the file level (no
1198 // re-indentation if there was a structural error) within the macro
1199 // definition.
1200 parseFile();
1201}
1202
1203void UnwrappedLineParser::parsePPPragma() {
1204 Line->InPragmaDirective = true;
1205 parsePPUnknown();
1206}
1207
1208void UnwrappedLineParser::parsePPUnknown() {
1209 do {
1210 nextToken();
1211 } while (!eof());
1213 Line->Level += PPBranchLevel + 1;
1214 addUnwrappedLine();
1215}
1216
1217// Here we exclude certain tokens that are not usually the first token in an
1218// unwrapped line. This is used in attempt to distinguish macro calls without
1219// trailing semicolons from other constructs split to several lines.
1220static bool tokenCanStartNewLine(const FormatToken &Tok) {
1221 // Semicolon can be a null-statement, l_square can be a start of a macro or
1222 // a C++11 attribute, but this doesn't seem to be common.
1223 return !Tok.isOneOf(tok::semi, tok::l_brace,
1224 // Tokens that can only be used as binary operators and a
1225 // part of overloaded operator names.
1226 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1227 tok::less, tok::greater, tok::slash, tok::percent,
1228 tok::lessless, tok::greatergreater, tok::equal,
1229 tok::plusequal, tok::minusequal, tok::starequal,
1230 tok::slashequal, tok::percentequal, tok::ampequal,
1231 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1232 tok::lesslessequal,
1233 // Colon is used in labels, base class lists, initializer
1234 // lists, range-based for loops, ternary operator, but
1235 // should never be the first token in an unwrapped line.
1236 tok::colon,
1237 // 'noexcept' is a trailing annotation.
1238 tok::kw_noexcept);
1239}
1240
1241static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1242 const FormatToken *FormatTok) {
1243 // FIXME: This returns true for C/C++ keywords like 'struct'.
1244 return FormatTok->is(tok::identifier) &&
1245 (!FormatTok->Tok.getIdentifierInfo() ||
1246 !FormatTok->isOneOf(
1247 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1248 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1249 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1250 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1251 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1252 Keywords.kw_instanceof, Keywords.kw_interface,
1253 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1254}
1255
1256static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1257 const FormatToken *FormatTok) {
1258 return FormatTok->Tok.isLiteral() ||
1259 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1260 mustBeJSIdent(Keywords, FormatTok);
1261}
1262
1263// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1264// when encountered after a value (see mustBeJSIdentOrValue).
1265static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1266 const FormatToken *FormatTok) {
1267 return FormatTok->isOneOf(
1268 tok::kw_return, Keywords.kw_yield,
1269 // conditionals
1270 tok::kw_if, tok::kw_else,
1271 // loops
1272 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1273 // switch/case
1274 tok::kw_switch, tok::kw_case,
1275 // exceptions
1276 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1277 // declaration
1278 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1279 Keywords.kw_async, Keywords.kw_function,
1280 // import/export
1281 Keywords.kw_import, tok::kw_export);
1282}
1283
1284// Checks whether a token is a type in K&R C (aka C78).
1285static bool isC78Type(const FormatToken &Tok) {
1286 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1287 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1288 tok::identifier);
1289}
1290
1291// This function checks whether a token starts the first parameter declaration
1292// in a K&R C (aka C78) function definition, e.g.:
1293// int f(a, b)
1294// short a, b;
1295// {
1296// return a + b;
1297// }
1298static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1299 const FormatToken *FuncName) {
1300 assert(Tok);
1301 assert(Next);
1302 assert(FuncName);
1303
1304 if (FuncName->isNot(tok::identifier))
1305 return false;
1306
1307 const FormatToken *Prev = FuncName->Previous;
1308 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1309 return false;
1310
1311 if (!isC78Type(*Tok) &&
1312 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1313 return false;
1314 }
1315
1316 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1317 return false;
1318
1319 Tok = Tok->Previous;
1320 if (!Tok || Tok->isNot(tok::r_paren))
1321 return false;
1322
1323 Tok = Tok->Previous;
1324 if (!Tok || Tok->isNot(tok::identifier))
1325 return false;
1326
1327 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1328}
1329
1330bool UnwrappedLineParser::parseModuleImport() {
1331 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1332
1333 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1334 !Token->Tok.getIdentifierInfo() &&
1335 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1336 return false;
1337 }
1338
1339 nextToken();
1340 while (!eof()) {
1341 if (FormatTok->is(tok::colon)) {
1342 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1343 }
1344 // Handle import <foo/bar.h> as we would an include statement.
1345 else if (FormatTok->is(tok::less)) {
1346 nextToken();
1347 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1348 // Mark tokens up to the trailing line comments as implicit string
1349 // literals.
1350 if (FormatTok->isNot(tok::comment) &&
1351 !FormatTok->TokenText.starts_with("//")) {
1352 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1353 }
1354 nextToken();
1355 }
1356 }
1357 if (FormatTok->is(tok::semi)) {
1358 nextToken();
1359 break;
1360 }
1361 nextToken();
1362 }
1363
1364 addUnwrappedLine();
1365 return true;
1366}
1367
1368// readTokenWithJavaScriptASI reads the next token and terminates the current
1369// line if JavaScript Automatic Semicolon Insertion must
1370// happen between the current token and the next token.
1371//
1372// This method is conservative - it cannot cover all edge cases of JavaScript,
1373// but only aims to correctly handle certain well known cases. It *must not*
1374// return true in speculative cases.
1375void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1376 FormatToken *Previous = FormatTok;
1377 readToken();
1378 FormatToken *Next = FormatTok;
1379
1380 bool IsOnSameLine =
1381 CommentsBeforeNextToken.empty()
1382 ? Next->NewlinesBefore == 0
1383 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1384 if (IsOnSameLine)
1385 return;
1386
1387 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1388 bool PreviousStartsTemplateExpr =
1389 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1390 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1391 // If the line contains an '@' sign, the previous token might be an
1392 // annotation, which can precede another identifier/value.
1393 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1394 return LineNode.Tok->is(tok::at);
1395 });
1396 if (HasAt)
1397 return;
1398 }
1399 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1400 return addUnwrappedLine();
1401 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1402 bool NextEndsTemplateExpr =
1403 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1404 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1405 (PreviousMustBeValue ||
1406 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1407 tok::minusminus))) {
1408 return addUnwrappedLine();
1409 }
1410 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1411 isJSDeclOrStmt(Keywords, Next)) {
1412 return addUnwrappedLine();
1413 }
1414}
1415
1416void UnwrappedLineParser::parseStructuralElement(
1417 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1418 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1419 if (Style.Language == FormatStyle::LK_TableGen &&
1420 FormatTok->is(tok::pp_include)) {
1421 nextToken();
1422 if (FormatTok->is(tok::string_literal))
1423 nextToken();
1424 addUnwrappedLine();
1425 return;
1426 }
1427
1428 if (IsCpp) {
1429 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1430 }
1431 } else if (Style.isVerilog()) {
1432 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1433 parseForOrWhileLoop(/*HasParens=*/false);
1434 return;
1435 }
1436 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1437 parseForOrWhileLoop();
1438 return;
1439 }
1440 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1441 Keywords.kw_assume, Keywords.kw_cover)) {
1442 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1443 return;
1444 }
1445
1446 // Skip things that can exist before keywords like 'if' and 'case'.
1447 while (true) {
1448 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1449 Keywords.kw_unique0)) {
1450 nextToken();
1451 } else if (FormatTok->is(tok::l_paren) &&
1452 Tokens->peekNextToken()->is(tok::star)) {
1453 parseParens();
1454 } else {
1455 break;
1456 }
1457 }
1458 }
1459
1460 // Tokens that only make sense at the beginning of a line.
1461 if (FormatTok->isAccessSpecifierKeyword()) {
1462 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1463 Style.isCSharp()) {
1464 nextToken();
1465 } else {
1466 parseAccessSpecifier();
1467 }
1468 return;
1469 }
1470 switch (FormatTok->Tok.getKind()) {
1471 case tok::kw_asm:
1472 nextToken();
1473 if (FormatTok->is(tok::l_brace)) {
1474 FormatTok->setFinalizedType(TT_InlineASMBrace);
1475 nextToken();
1476 while (FormatTok && !eof()) {
1477 if (FormatTok->is(tok::r_brace)) {
1478 FormatTok->setFinalizedType(TT_InlineASMBrace);
1479 nextToken();
1480 addUnwrappedLine();
1481 break;
1482 }
1483 FormatTok->Finalized = true;
1484 nextToken();
1485 }
1486 }
1487 break;
1488 case tok::kw_namespace:
1489 parseNamespace();
1490 return;
1491 case tok::kw_if: {
1492 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1493 // field/method declaration.
1494 break;
1495 }
1496 FormatToken *Tok = parseIfThenElse(IfKind);
1497 if (IfLeftBrace)
1498 *IfLeftBrace = Tok;
1499 return;
1500 }
1501 case tok::kw_for:
1502 case tok::kw_while:
1503 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1504 // field/method declaration.
1505 break;
1506 }
1507 parseForOrWhileLoop();
1508 return;
1509 case tok::kw_do:
1510 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1511 // field/method declaration.
1512 break;
1513 }
1514 parseDoWhile();
1515 if (HasDoWhile)
1516 *HasDoWhile = true;
1517 return;
1518 case tok::kw_switch:
1519 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1520 // 'switch: string' field declaration.
1521 break;
1522 }
1523 parseSwitch(/*IsExpr=*/false);
1524 return;
1525 case tok::kw_default: {
1526 // In Verilog default along with other labels are handled in the next loop.
1527 if (Style.isVerilog())
1528 break;
1529 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1530 // 'default: string' field declaration.
1531 break;
1532 }
1533 auto *Default = FormatTok;
1534 nextToken();
1535 if (FormatTok->is(tok::colon)) {
1536 FormatTok->setFinalizedType(TT_CaseLabelColon);
1537 parseLabel();
1538 return;
1539 }
1540 if (FormatTok->is(tok::arrow)) {
1541 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1542 Default->setFinalizedType(TT_SwitchExpressionLabel);
1543 parseLabel();
1544 return;
1545 }
1546 // e.g. "default void f() {}" in a Java interface.
1547 break;
1548 }
1549 case tok::kw_case:
1550 // Proto: there are no switch/case statements.
1551 if (Style.Language == FormatStyle::LK_Proto) {
1552 nextToken();
1553 return;
1554 }
1555 if (Style.isVerilog()) {
1556 parseBlock();
1557 addUnwrappedLine();
1558 return;
1559 }
1560 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1561 // 'case: string' field declaration.
1562 nextToken();
1563 break;
1564 }
1565 parseCaseLabel();
1566 return;
1567 case tok::kw_try:
1568 case tok::kw___try:
1569 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1570 // field/method declaration.
1571 break;
1572 }
1573 parseTryCatch();
1574 return;
1575 case tok::kw_extern:
1576 nextToken();
1577 if (Style.isVerilog()) {
1578 // In Verilog and extern module declaration looks like a start of module.
1579 // But there is no body and endmodule. So we handle it separately.
1580 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1581 parseVerilogHierarchyHeader();
1582 return;
1583 }
1584 } else if (FormatTok->is(tok::string_literal)) {
1585 nextToken();
1586 if (FormatTok->is(tok::l_brace)) {
1588 addUnwrappedLine();
1589 // Either we indent or for backwards compatibility we follow the
1590 // AfterExternBlock style.
1591 unsigned AddLevels =
1594 Style.IndentExternBlock ==
1596 ? 1u
1597 : 0u;
1598 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1599 addUnwrappedLine();
1600 return;
1601 }
1602 }
1603 break;
1604 case tok::kw_export:
1605 if (Style.isJavaScript()) {
1606 parseJavaScriptEs6ImportExport();
1607 return;
1608 }
1609 if (IsCpp) {
1610 nextToken();
1611 if (FormatTok->is(tok::kw_namespace)) {
1612 parseNamespace();
1613 return;
1614 }
1615 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1616 return;
1617 }
1618 break;
1619 case tok::kw_inline:
1620 nextToken();
1621 if (FormatTok->is(tok::kw_namespace)) {
1622 parseNamespace();
1623 return;
1624 }
1625 break;
1626 case tok::identifier:
1627 if (FormatTok->is(TT_ForEachMacro)) {
1628 parseForOrWhileLoop();
1629 return;
1630 }
1631 if (FormatTok->is(TT_MacroBlockBegin)) {
1632 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1633 /*MunchSemi=*/false);
1634 return;
1635 }
1636 if (FormatTok->is(Keywords.kw_import)) {
1637 if (Style.isJavaScript()) {
1638 parseJavaScriptEs6ImportExport();
1639 return;
1640 }
1641 if (Style.Language == FormatStyle::LK_Proto) {
1642 nextToken();
1643 if (FormatTok->is(tok::kw_public))
1644 nextToken();
1645 if (FormatTok->isNot(tok::string_literal))
1646 return;
1647 nextToken();
1648 if (FormatTok->is(tok::semi))
1649 nextToken();
1650 addUnwrappedLine();
1651 return;
1652 }
1653 if (IsCpp && parseModuleImport())
1654 return;
1655 }
1656 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1657 Keywords.kw_slots, Keywords.kw_qslots)) {
1658 nextToken();
1659 if (FormatTok->is(tok::colon)) {
1660 nextToken();
1661 addUnwrappedLine();
1662 return;
1663 }
1664 }
1665 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1666 parseStatementMacro();
1667 return;
1668 }
1669 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1670 parseNamespace();
1671 return;
1672 }
1673 // In Verilog labels can be any expression, so we don't do them here.
1674 // JS doesn't have macros, and within classes colons indicate fields, not
1675 // labels.
1676 // TableGen doesn't have labels.
1677 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1678 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1679 nextToken();
1680 if (!Line->InMacroBody || CurrentLines->size() > 1)
1681 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1682 FormatTok->setFinalizedType(TT_GotoLabelColon);
1683 parseLabel(!Style.IndentGotoLabels);
1684 if (HasLabel)
1685 *HasLabel = true;
1686 return;
1687 }
1688 // In all other cases, parse the declaration.
1689 break;
1690 default:
1691 break;
1692 }
1693
1694 for (const bool InRequiresExpression =
1695 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1696 !eof();) {
1697 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1698 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1699 Next && Next->isBinaryOperator()) {
1700 FormatTok->Tok.setKind(tok::identifier);
1701 }
1702 }
1703 const FormatToken *Previous = FormatTok->Previous;
1704 switch (FormatTok->Tok.getKind()) {
1705 case tok::at:
1706 nextToken();
1707 if (FormatTok->is(tok::l_brace)) {
1708 nextToken();
1709 parseBracedList();
1710 break;
1711 } else if (Style.Language == FormatStyle::LK_Java &&
1712 FormatTok->is(Keywords.kw_interface)) {
1713 nextToken();
1714 break;
1715 }
1716 switch (FormatTok->Tok.getObjCKeywordID()) {
1717 case tok::objc_public:
1718 case tok::objc_protected:
1719 case tok::objc_package:
1720 case tok::objc_private:
1721 return parseAccessSpecifier();
1722 case tok::objc_interface:
1723 case tok::objc_implementation:
1724 return parseObjCInterfaceOrImplementation();
1725 case tok::objc_protocol:
1726 if (parseObjCProtocol())
1727 return;
1728 break;
1729 case tok::objc_end:
1730 return; // Handled by the caller.
1731 case tok::objc_optional:
1732 case tok::objc_required:
1733 nextToken();
1734 addUnwrappedLine();
1735 return;
1736 case tok::objc_autoreleasepool:
1737 nextToken();
1738 if (FormatTok->is(tok::l_brace)) {
1741 addUnwrappedLine();
1742 }
1743 parseBlock();
1744 }
1745 addUnwrappedLine();
1746 return;
1747 case tok::objc_synchronized:
1748 nextToken();
1749 if (FormatTok->is(tok::l_paren)) {
1750 // Skip synchronization object
1751 parseParens();
1752 }
1753 if (FormatTok->is(tok::l_brace)) {
1756 addUnwrappedLine();
1757 }
1758 parseBlock();
1759 }
1760 addUnwrappedLine();
1761 return;
1762 case tok::objc_try:
1763 // This branch isn't strictly necessary (the kw_try case below would
1764 // do this too after the tok::at is parsed above). But be explicit.
1765 parseTryCatch();
1766 return;
1767 default:
1768 break;
1769 }
1770 break;
1771 case tok::kw_requires: {
1772 if (IsCpp) {
1773 bool ParsedClause = parseRequires();
1774 if (ParsedClause)
1775 return;
1776 } else {
1777 nextToken();
1778 }
1779 break;
1780 }
1781 case tok::kw_enum:
1782 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1783 // "template <..., enum ...>".
1784 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1785 nextToken();
1786 break;
1787 }
1788
1789 // parseEnum falls through and does not yet add an unwrapped line as an
1790 // enum definition can start a structural element.
1791 if (!parseEnum())
1792 break;
1793 // This only applies to C++ and Verilog.
1794 if (!IsCpp && !Style.isVerilog()) {
1795 addUnwrappedLine();
1796 return;
1797 }
1798 break;
1799 case tok::kw_typedef:
1800 nextToken();
1801 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1802 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1803 Keywords.kw_CF_CLOSED_ENUM,
1804 Keywords.kw_NS_CLOSED_ENUM)) {
1805 parseEnum();
1806 }
1807 break;
1808 case tok::kw_class:
1809 if (Style.isVerilog()) {
1810 parseBlock();
1811 addUnwrappedLine();
1812 return;
1813 }
1814 if (Style.isTableGen()) {
1815 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1816 // This is same as def and so on.
1817 nextToken();
1818 break;
1819 }
1820 [[fallthrough]];
1821 case tok::kw_struct:
1822 case tok::kw_union:
1823 if (parseStructLike())
1824 return;
1825 break;
1826 case tok::kw_decltype:
1827 nextToken();
1828 if (FormatTok->is(tok::l_paren)) {
1829 parseParens();
1830 assert(FormatTok->Previous);
1831 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1832 tok::l_paren)) {
1833 Line->SeenDecltypeAuto = true;
1834 }
1835 }
1836 break;
1837 case tok::period:
1838 nextToken();
1839 // In Java, classes have an implicit static member "class".
1840 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1841 FormatTok->is(tok::kw_class)) {
1842 nextToken();
1843 }
1844 if (Style.isJavaScript() && FormatTok &&
1845 FormatTok->Tok.getIdentifierInfo()) {
1846 // JavaScript only has pseudo keywords, all keywords are allowed to
1847 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1848 nextToken();
1849 }
1850 break;
1851 case tok::semi:
1852 nextToken();
1853 addUnwrappedLine();
1854 return;
1855 case tok::r_brace:
1856 addUnwrappedLine();
1857 return;
1858 case tok::l_paren: {
1859 parseParens();
1860 // Break the unwrapped line if a K&R C function definition has a parameter
1861 // declaration.
1862 if (OpeningBrace || !IsCpp || !Previous || eof())
1863 break;
1864 if (isC78ParameterDecl(FormatTok,
1865 Tokens->peekNextToken(/*SkipComment=*/true),
1866 Previous)) {
1867 addUnwrappedLine();
1868 return;
1869 }
1870 break;
1871 }
1872 case tok::kw_operator:
1873 nextToken();
1874 if (FormatTok->isBinaryOperator())
1875 nextToken();
1876 break;
1877 case tok::caret:
1878 nextToken();
1879 // Block return type.
1880 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1881 nextToken();
1882 // Return types: pointers are ok too.
1883 while (FormatTok->is(tok::star))
1884 nextToken();
1885 }
1886 // Block argument list.
1887 if (FormatTok->is(tok::l_paren))
1888 parseParens();
1889 // Block body.
1890 if (FormatTok->is(tok::l_brace))
1891 parseChildBlock();
1892 break;
1893 case tok::l_brace:
1894 if (InRequiresExpression)
1895 FormatTok->setFinalizedType(TT_BracedListLBrace);
1896 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1897 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1898 // A block outside of parentheses must be the last part of a
1899 // structural element.
1900 // FIXME: Figure out cases where this is not true, and add projections
1901 // for them (the one we know is missing are lambdas).
1902 if (Style.Language == FormatStyle::LK_Java &&
1903 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1904 // If necessary, we could set the type to something different than
1905 // TT_FunctionLBrace.
1908 addUnwrappedLine();
1909 }
1910 } else if (Style.BraceWrapping.AfterFunction) {
1911 addUnwrappedLine();
1912 }
1913 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1914 FormatTok->setFinalizedType(TT_FunctionLBrace);
1915 parseBlock();
1916 IsDecltypeAutoFunction = false;
1917 addUnwrappedLine();
1918 return;
1919 }
1920 // Otherwise this was a braced init list, and the structural
1921 // element continues.
1922 break;
1923 case tok::kw_try:
1924 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1925 // field/method declaration.
1926 nextToken();
1927 break;
1928 }
1929 // We arrive here when parsing function-try blocks.
1930 if (Style.BraceWrapping.AfterFunction)
1931 addUnwrappedLine();
1932 parseTryCatch();
1933 return;
1934 case tok::identifier: {
1935 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1936 Line->MustBeDeclaration) {
1937 addUnwrappedLine();
1938 parseCSharpGenericTypeConstraint();
1939 break;
1940 }
1941 if (FormatTok->is(TT_MacroBlockEnd)) {
1942 addUnwrappedLine();
1943 return;
1944 }
1945
1946 // Function declarations (as opposed to function expressions) are parsed
1947 // on their own unwrapped line by continuing this loop. Function
1948 // expressions (functions that are not on their own line) must not create
1949 // a new unwrapped line, so they are special cased below.
1950 size_t TokenCount = Line->Tokens.size();
1951 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1952 (TokenCount > 1 ||
1953 (TokenCount == 1 &&
1954 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1955 tryToParseJSFunction();
1956 break;
1957 }
1958 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1959 FormatTok->is(Keywords.kw_interface)) {
1960 if (Style.isJavaScript()) {
1961 // In JavaScript/TypeScript, "interface" can be used as a standalone
1962 // identifier, e.g. in `var interface = 1;`. If "interface" is
1963 // followed by another identifier, it is very like to be an actual
1964 // interface declaration.
1965 unsigned StoredPosition = Tokens->getPosition();
1966 FormatToken *Next = Tokens->getNextToken();
1967 FormatTok = Tokens->setPosition(StoredPosition);
1968 if (!mustBeJSIdent(Keywords, Next)) {
1969 nextToken();
1970 break;
1971 }
1972 }
1973 parseRecord();
1974 addUnwrappedLine();
1975 return;
1976 }
1977
1978 if (Style.isVerilog()) {
1979 if (FormatTok->is(Keywords.kw_table)) {
1980 parseVerilogTable();
1981 return;
1982 }
1983 if (Keywords.isVerilogBegin(*FormatTok) ||
1984 Keywords.isVerilogHierarchy(*FormatTok)) {
1985 parseBlock();
1986 addUnwrappedLine();
1987 return;
1988 }
1989 }
1990
1991 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
1992 if (parseStructLike())
1993 return;
1994 break;
1995 }
1996
1997 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1998 parseStatementMacro();
1999 return;
2000 }
2001
2002 // See if the following token should start a new unwrapped line.
2003 StringRef Text = FormatTok->TokenText;
2004
2005 FormatToken *PreviousToken = FormatTok;
2006 nextToken();
2007
2008 // JS doesn't have macros, and within classes colons indicate fields, not
2009 // labels.
2010 if (Style.isJavaScript())
2011 break;
2012
2013 auto OneTokenSoFar = [&]() {
2014 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2015 while (I != E && I->Tok->is(tok::comment))
2016 ++I;
2017 if (Style.isVerilog())
2018 while (I != E && I->Tok->is(tok::hash))
2019 ++I;
2020 return I != E && (++I == E);
2021 };
2022 if (OneTokenSoFar()) {
2023 // Recognize function-like macro usages without trailing semicolon as
2024 // well as free-standing macros like Q_OBJECT.
2025 bool FunctionLike = FormatTok->is(tok::l_paren);
2026 if (FunctionLike)
2027 parseParens();
2028
2029 bool FollowedByNewline =
2030 CommentsBeforeNextToken.empty()
2031 ? FormatTok->NewlinesBefore > 0
2032 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2033
2034 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2035 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2036 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2037 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2038 addUnwrappedLine();
2039 return;
2040 }
2041 }
2042 break;
2043 }
2044 case tok::equal:
2045 if ((Style.isJavaScript() || Style.isCSharp()) &&
2046 FormatTok->is(TT_FatArrow)) {
2047 tryToParseChildBlock();
2048 break;
2049 }
2050
2051 nextToken();
2052 if (FormatTok->is(tok::l_brace)) {
2053 // Block kind should probably be set to BK_BracedInit for any language.
2054 // C# needs this change to ensure that array initialisers and object
2055 // initialisers are indented the same way.
2056 if (Style.isCSharp())
2057 FormatTok->setBlockKind(BK_BracedInit);
2058 // TableGen's defset statement has syntax of the form,
2059 // `defset <type> <name> = { <statement>... }`
2060 if (Style.isTableGen() &&
2061 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2062 FormatTok->setFinalizedType(TT_FunctionLBrace);
2063 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2064 /*MunchSemi=*/false);
2065 addUnwrappedLine();
2066 break;
2067 }
2068 nextToken();
2069 parseBracedList();
2070 } else if (Style.Language == FormatStyle::LK_Proto &&
2071 FormatTok->is(tok::less)) {
2072 nextToken();
2073 parseBracedList(/*IsAngleBracket=*/true);
2074 }
2075 break;
2076 case tok::l_square:
2077 parseSquare();
2078 break;
2079 case tok::kw_new:
2080 parseNew();
2081 break;
2082 case tok::kw_switch:
2083 if (Style.Language == FormatStyle::LK_Java)
2084 parseSwitch(/*IsExpr=*/true);
2085 nextToken();
2086 break;
2087 case tok::kw_case:
2088 // Proto: there are no switch/case statements.
2089 if (Style.Language == FormatStyle::LK_Proto) {
2090 nextToken();
2091 return;
2092 }
2093 // In Verilog switch is called case.
2094 if (Style.isVerilog()) {
2095 parseBlock();
2096 addUnwrappedLine();
2097 return;
2098 }
2099 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2100 // 'case: string' field declaration.
2101 nextToken();
2102 break;
2103 }
2104 parseCaseLabel();
2105 break;
2106 case tok::kw_default:
2107 nextToken();
2108 if (Style.isVerilog()) {
2109 if (FormatTok->is(tok::colon)) {
2110 // The label will be handled in the next iteration.
2111 break;
2112 }
2113 if (FormatTok->is(Keywords.kw_clocking)) {
2114 // A default clocking block.
2115 parseBlock();
2116 addUnwrappedLine();
2117 return;
2118 }
2119 parseVerilogCaseLabel();
2120 return;
2121 }
2122 break;
2123 case tok::colon:
2124 nextToken();
2125 if (Style.isVerilog()) {
2126 parseVerilogCaseLabel();
2127 return;
2128 }
2129 break;
2130 default:
2131 nextToken();
2132 break;
2133 }
2134 }
2135}
2136
2137bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2138 assert(FormatTok->is(tok::l_brace));
2139 if (!Style.isCSharp())
2140 return false;
2141 // See if it's a property accessor.
2142 if (FormatTok->Previous->isNot(tok::identifier))
2143 return false;
2144
2145 // See if we are inside a property accessor.
2146 //
2147 // Record the current tokenPosition so that we can advance and
2148 // reset the current token. `Next` is not set yet so we need
2149 // another way to advance along the token stream.
2150 unsigned int StoredPosition = Tokens->getPosition();
2151 FormatToken *Tok = Tokens->getNextToken();
2152
2153 // A trivial property accessor is of the form:
2154 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2155 // Track these as they do not require line breaks to be introduced.
2156 bool HasSpecialAccessor = false;
2157 bool IsTrivialPropertyAccessor = true;
2158 while (!eof()) {
2159 if (Tok->isAccessSpecifierKeyword() ||
2160 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get,
2161 Keywords.kw_init, Keywords.kw_set)) {
2162 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2163 HasSpecialAccessor = true;
2164 Tok = Tokens->getNextToken();
2165 continue;
2166 }
2167 if (Tok->isNot(tok::r_brace))
2168 IsTrivialPropertyAccessor = false;
2169 break;
2170 }
2171
2172 if (!HasSpecialAccessor) {
2173 Tokens->setPosition(StoredPosition);
2174 return false;
2175 }
2176
2177 // Try to parse the property accessor:
2178 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2179 Tokens->setPosition(StoredPosition);
2180 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2181 addUnwrappedLine();
2182 nextToken();
2183 do {
2184 switch (FormatTok->Tok.getKind()) {
2185 case tok::r_brace:
2186 nextToken();
2187 if (FormatTok->is(tok::equal)) {
2188 while (!eof() && FormatTok->isNot(tok::semi))
2189 nextToken();
2190 nextToken();
2191 }
2192 addUnwrappedLine();
2193 return true;
2194 case tok::l_brace:
2195 ++Line->Level;
2196 parseBlock(/*MustBeDeclaration=*/true);
2197 addUnwrappedLine();
2198 --Line->Level;
2199 break;
2200 case tok::equal:
2201 if (FormatTok->is(TT_FatArrow)) {
2202 ++Line->Level;
2203 do {
2204 nextToken();
2205 } while (!eof() && FormatTok->isNot(tok::semi));
2206 nextToken();
2207 addUnwrappedLine();
2208 --Line->Level;
2209 break;
2210 }
2211 nextToken();
2212 break;
2213 default:
2214 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2215 Keywords.kw_set) &&
2216 !IsTrivialPropertyAccessor) {
2217 // Non-trivial get/set needs to be on its own line.
2218 addUnwrappedLine();
2219 }
2220 nextToken();
2221 }
2222 } while (!eof());
2223
2224 // Unreachable for well-formed code (paired '{' and '}').
2225 return true;
2226}
2227
2228bool UnwrappedLineParser::tryToParseLambda() {
2229 assert(FormatTok->is(tok::l_square));
2230 if (!IsCpp) {
2231 nextToken();
2232 return false;
2233 }
2234 FormatToken &LSquare = *FormatTok;
2235 if (!tryToParseLambdaIntroducer())
2236 return false;
2237
2238 bool SeenArrow = false;
2239 bool InTemplateParameterList = false;
2240
2241 while (FormatTok->isNot(tok::l_brace)) {
2242 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2243 nextToken();
2244 continue;
2245 }
2246 switch (FormatTok->Tok.getKind()) {
2247 case tok::l_brace:
2248 break;
2249 case tok::l_paren:
2250 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2251 break;
2252 case tok::l_square:
2253 parseSquare();
2254 break;
2255 case tok::less:
2256 assert(FormatTok->Previous);
2257 if (FormatTok->Previous->is(tok::r_square))
2258 InTemplateParameterList = true;
2259 nextToken();
2260 break;
2261 case tok::kw_auto:
2262 case tok::kw_class:
2263 case tok::kw_struct:
2264 case tok::kw_union:
2265 case tok::kw_template:
2266 case tok::kw_typename:
2267 case tok::amp:
2268 case tok::star:
2269 case tok::kw_const:
2270 case tok::kw_constexpr:
2271 case tok::kw_consteval:
2272 case tok::comma:
2273 case tok::greater:
2274 case tok::identifier:
2275 case tok::numeric_constant:
2276 case tok::coloncolon:
2277 case tok::kw_mutable:
2278 case tok::kw_noexcept:
2279 case tok::kw_static:
2280 nextToken();
2281 break;
2282 // Specialization of a template with an integer parameter can contain
2283 // arithmetic, logical, comparison and ternary operators.
2284 //
2285 // FIXME: This also accepts sequences of operators that are not in the scope
2286 // of a template argument list.
2287 //
2288 // In a C++ lambda a template type can only occur after an arrow. We use
2289 // this as an heuristic to distinguish between Objective-C expressions
2290 // followed by an `a->b` expression, such as:
2291 // ([obj func:arg] + a->b)
2292 // Otherwise the code below would parse as a lambda.
2293 case tok::plus:
2294 case tok::minus:
2295 case tok::exclaim:
2296 case tok::tilde:
2297 case tok::slash:
2298 case tok::percent:
2299 case tok::lessless:
2300 case tok::pipe:
2301 case tok::pipepipe:
2302 case tok::ampamp:
2303 case tok::caret:
2304 case tok::equalequal:
2305 case tok::exclaimequal:
2306 case tok::greaterequal:
2307 case tok::lessequal:
2308 case tok::question:
2309 case tok::colon:
2310 case tok::ellipsis:
2311 case tok::kw_true:
2312 case tok::kw_false:
2313 if (SeenArrow || InTemplateParameterList) {
2314 nextToken();
2315 break;
2316 }
2317 return true;
2318 case tok::arrow:
2319 // This might or might not actually be a lambda arrow (this could be an
2320 // ObjC method invocation followed by a dereferencing arrow). We might
2321 // reset this back to TT_Unknown in TokenAnnotator.
2322 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2323 SeenArrow = true;
2324 nextToken();
2325 break;
2326 case tok::kw_requires: {
2327 auto *RequiresToken = FormatTok;
2328 nextToken();
2329 parseRequiresClause(RequiresToken);
2330 break;
2331 }
2332 case tok::equal:
2333 if (!InTemplateParameterList)
2334 return true;
2335 nextToken();
2336 break;
2337 default:
2338 return true;
2339 }
2340 }
2341
2342 FormatTok->setFinalizedType(TT_LambdaLBrace);
2343 LSquare.setFinalizedType(TT_LambdaLSquare);
2344
2345 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2346 parseChildBlock();
2347 assert(!NestedLambdas.empty());
2348 NestedLambdas.pop_back();
2349
2350 return true;
2351}
2352
2353bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2354 const FormatToken *Previous = FormatTok->Previous;
2355 const FormatToken *LeftSquare = FormatTok;
2356 nextToken();
2357 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2358 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2359 tok::kw_co_yield, tok::kw_co_return)) ||
2360 Previous->closesScope())) ||
2361 LeftSquare->isCppStructuredBinding(IsCpp)) {
2362 return false;
2363 }
2364 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2365 return false;
2366 if (FormatTok->is(tok::r_square)) {
2367 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2368 if (Next->is(tok::greater))
2369 return false;
2370 }
2371 parseSquare(/*LambdaIntroducer=*/true);
2372 return true;
2373}
2374
2375void UnwrappedLineParser::tryToParseJSFunction() {
2376 assert(FormatTok->is(Keywords.kw_function));
2377 if (FormatTok->is(Keywords.kw_async))
2378 nextToken();
2379 // Consume "function".
2380 nextToken();
2381
2382 // Consume * (generator function). Treat it like C++'s overloaded operators.
2383 if (FormatTok->is(tok::star)) {
2384 FormatTok->setFinalizedType(TT_OverloadedOperator);
2385 nextToken();
2386 }
2387
2388 // Consume function name.
2389 if (FormatTok->is(tok::identifier))
2390 nextToken();
2391
2392 if (FormatTok->isNot(tok::l_paren))
2393 return;
2394
2395 // Parse formal parameter list.
2396 parseParens();
2397
2398 if (FormatTok->is(tok::colon)) {
2399 // Parse a type definition.
2400 nextToken();
2401
2402 // Eat the type declaration. For braced inline object types, balance braces,
2403 // otherwise just parse until finding an l_brace for the function body.
2404 if (FormatTok->is(tok::l_brace))
2405 tryToParseBracedList();
2406 else
2407 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2408 nextToken();
2409 }
2410
2411 if (FormatTok->is(tok::semi))
2412 return;
2413
2414 parseChildBlock();
2415}
2416
2417bool UnwrappedLineParser::tryToParseBracedList() {
2418 if (FormatTok->is(BK_Unknown))
2419 calculateBraceTypes();
2420 assert(FormatTok->isNot(BK_Unknown));
2421 if (FormatTok->is(BK_Block))
2422 return false;
2423 nextToken();
2424 parseBracedList();
2425 return true;
2426}
2427
2428bool UnwrappedLineParser::tryToParseChildBlock() {
2429 assert(Style.isJavaScript() || Style.isCSharp());
2430 assert(FormatTok->is(TT_FatArrow));
2431 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2432 // They always start an expression or a child block if followed by a curly
2433 // brace.
2434 nextToken();
2435 if (FormatTok->isNot(tok::l_brace))
2436 return false;
2437 parseChildBlock();
2438 return true;
2439}
2440
2441bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2442 assert(!IsAngleBracket || !IsEnum);
2443 bool HasError = false;
2444
2445 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2446 // replace this by using parseAssignmentExpression() inside.
2447 do {
2448 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2449 tryToParseChildBlock()) {
2450 continue;
2451 }
2452 if (Style.isJavaScript()) {
2453 if (FormatTok->is(Keywords.kw_function)) {
2454 tryToParseJSFunction();
2455 continue;
2456 }
2457 if (FormatTok->is(tok::l_brace)) {
2458 // Could be a method inside of a braced list `{a() { return 1; }}`.
2459 if (tryToParseBracedList())
2460 continue;
2461 parseChildBlock();
2462 }
2463 }
2464 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2465 if (IsEnum) {
2466 FormatTok->setBlockKind(BK_Block);
2468 addUnwrappedLine();
2469 }
2470 nextToken();
2471 return !HasError;
2472 }
2473 switch (FormatTok->Tok.getKind()) {
2474 case tok::l_square:
2475 if (Style.isCSharp())
2476 parseSquare();
2477 else
2478 tryToParseLambda();
2479 break;
2480 case tok::l_paren:
2481 parseParens();
2482 // JavaScript can just have free standing methods and getters/setters in
2483 // object literals. Detect them by a "{" following ")".
2484 if (Style.isJavaScript()) {
2485 if (FormatTok->is(tok::l_brace))
2486 parseChildBlock();
2487 break;
2488 }
2489 break;
2490 case tok::l_brace:
2491 // Assume there are no blocks inside a braced init list apart
2492 // from the ones we explicitly parse out (like lambdas).
2493 FormatTok->setBlockKind(BK_BracedInit);
2494 nextToken();
2495 parseBracedList();
2496 break;
2497 case tok::less:
2498 nextToken();
2499 if (IsAngleBracket)
2500 parseBracedList(/*IsAngleBracket=*/true);
2501 break;
2502 case tok::semi:
2503 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2504 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2505 // used for error recovery if we have otherwise determined that this is
2506 // a braced list.
2507 if (Style.isJavaScript()) {
2508 nextToken();
2509 break;
2510 }
2511 HasError = true;
2512 if (!IsEnum)
2513 return false;
2514 nextToken();
2515 break;
2516 case tok::comma:
2517 nextToken();
2518 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2519 addUnwrappedLine();
2520 break;
2521 default:
2522 nextToken();
2523 break;
2524 }
2525 } while (!eof());
2526 return false;
2527}
2528
2529/// \brief Parses a pair of parentheses (and everything between them).
2530/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2531/// double ampersands. This applies for all nested scopes as well.
2532///
2533/// Returns whether there is a `=` token between the parentheses.
2534bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2535 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2536 auto *LeftParen = FormatTok;
2537 bool SeenEqual = false;
2538 bool MightBeFoldExpr = false;
2539 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2540 nextToken();
2541 do {
2542 switch (FormatTok->Tok.getKind()) {
2543 case tok::l_paren:
2544 if (parseParens(AmpAmpTokenType))
2545 SeenEqual = true;
2546 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2547 parseChildBlock();
2548 break;
2549 case tok::r_paren: {
2550 const auto *Prev = LeftParen->Previous;
2551 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2553 const auto *Next = Tokens->peekNextToken();
2554 const bool DoubleParens =
2555 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2556 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2557 const bool Blacklisted =
2558 PrevPrev &&
2559 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2560 (SeenEqual &&
2561 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2562 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2563 const bool ReturnParens =
2565 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2566 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2567 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2568 Next->is(tok::semi);
2569 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2570 LeftParen->Optional = true;
2571 FormatTok->Optional = true;
2572 }
2573 }
2574 if (Prev && Prev->is(TT_TypenameMacro)) {
2575 LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2576 FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2577 }
2578 nextToken();
2579 return SeenEqual;
2580 }
2581 case tok::r_brace:
2582 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2583 return SeenEqual;
2584 case tok::l_square:
2585 tryToParseLambda();
2586 break;
2587 case tok::l_brace:
2588 if (!tryToParseBracedList())
2589 parseChildBlock();
2590 break;
2591 case tok::at:
2592 nextToken();
2593 if (FormatTok->is(tok::l_brace)) {
2594 nextToken();
2595 parseBracedList();
2596 }
2597 break;
2598 case tok::ellipsis:
2599 MightBeFoldExpr = true;
2600 nextToken();
2601 break;
2602 case tok::equal:
2603 SeenEqual = true;
2604 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2605 tryToParseChildBlock();
2606 else
2607 nextToken();
2608 break;
2609 case tok::kw_class:
2610 if (Style.isJavaScript())
2611 parseRecord(/*ParseAsExpr=*/true);
2612 else
2613 nextToken();
2614 break;
2615 case tok::identifier:
2616 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2617 tryToParseJSFunction();
2618 else
2619 nextToken();
2620 break;
2621 case tok::kw_switch:
2622 parseSwitch(/*IsExpr=*/true);
2623 break;
2624 case tok::kw_requires: {
2625 auto RequiresToken = FormatTok;
2626 nextToken();
2627 parseRequiresExpression(RequiresToken);
2628 break;
2629 }
2630 case tok::ampamp:
2631 if (AmpAmpTokenType != TT_Unknown)
2632 FormatTok->setFinalizedType(AmpAmpTokenType);
2633 [[fallthrough]];
2634 default:
2635 nextToken();
2636 break;
2637 }
2638 } while (!eof());
2639 return SeenEqual;
2640}
2641
2642void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2643 if (!LambdaIntroducer) {
2644 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2645 if (tryToParseLambda())
2646 return;
2647 }
2648 do {
2649 switch (FormatTok->Tok.getKind()) {
2650 case tok::l_paren:
2651 parseParens();
2652 break;
2653 case tok::r_square:
2654 nextToken();
2655 return;
2656 case tok::r_brace:
2657 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2658 return;
2659 case tok::l_square:
2660 parseSquare();
2661 break;
2662 case tok::l_brace: {
2663 if (!tryToParseBracedList())
2664 parseChildBlock();
2665 break;
2666 }
2667 case tok::at:
2668 nextToken();
2669 if (FormatTok->is(tok::l_brace)) {
2670 nextToken();
2671 parseBracedList();
2672 }
2673 break;
2674 default:
2675 nextToken();
2676 break;
2677 }
2678 } while (!eof());
2679}
2680
2681void UnwrappedLineParser::keepAncestorBraces() {
2682 if (!Style.RemoveBracesLLVM)
2683 return;
2684
2685 const int MaxNestingLevels = 2;
2686 const int Size = NestedTooDeep.size();
2687 if (Size >= MaxNestingLevels)
2688 NestedTooDeep[Size - MaxNestingLevels] = true;
2689 NestedTooDeep.push_back(false);
2690}
2691
2693 for (const auto &Token : llvm::reverse(Line.Tokens))
2694 if (Token.Tok->isNot(tok::comment))
2695 return Token.Tok;
2696
2697 return nullptr;
2698}
2699
2700void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2701 FormatToken *Tok = nullptr;
2702
2703 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2704 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2706 ? getLastNonComment(*Line)
2707 : Line->Tokens.back().Tok;
2708 assert(Tok);
2709 if (Tok->BraceCount < 0) {
2710 assert(Tok->BraceCount == -1);
2711 Tok = nullptr;
2712 } else {
2713 Tok->BraceCount = -1;
2714 }
2715 }
2716
2717 addUnwrappedLine();
2718 ++Line->Level;
2719 ++Line->UnbracedBodyLevel;
2720 parseStructuralElement();
2721 --Line->UnbracedBodyLevel;
2722
2723 if (Tok) {
2724 assert(!Line->InPPDirective);
2725 Tok = nullptr;
2726 for (const auto &L : llvm::reverse(*CurrentLines)) {
2727 if (!L.InPPDirective && getLastNonComment(L)) {
2728 Tok = L.Tokens.back().Tok;
2729 break;
2730 }
2731 }
2732 assert(Tok);
2733 ++Tok->BraceCount;
2734 }
2735
2736 if (CheckEOF && eof())
2737 addUnwrappedLine();
2738
2739 --Line->Level;
2740}
2741
2742static void markOptionalBraces(FormatToken *LeftBrace) {
2743 if (!LeftBrace)
2744 return;
2745
2746 assert(LeftBrace->is(tok::l_brace));
2747
2748 FormatToken *RightBrace = LeftBrace->MatchingParen;
2749 if (!RightBrace) {
2750 assert(!LeftBrace->Optional);
2751 return;
2752 }
2753
2754 assert(RightBrace->is(tok::r_brace));
2755 assert(RightBrace->MatchingParen == LeftBrace);
2756 assert(LeftBrace->Optional == RightBrace->Optional);
2757
2758 LeftBrace->Optional = true;
2759 RightBrace->Optional = true;
2760}
2761
2762void UnwrappedLineParser::handleAttributes() {
2763 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2764 if (FormatTok->isAttribute())
2765 nextToken();
2766 else if (FormatTok->is(tok::l_square))
2767 handleCppAttributes();
2768}
2769
2770bool UnwrappedLineParser::handleCppAttributes() {
2771 // Handle [[likely]] / [[unlikely]] attributes.
2772 assert(FormatTok->is(tok::l_square));
2773 if (!tryToParseSimpleAttribute())
2774 return false;
2775 parseSquare();
2776 return true;
2777}
2778
2779/// Returns whether \c Tok begins a block.
2780bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2781 // FIXME: rename the function or make
2782 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2783 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2784 : Tok.is(tok::l_brace);
2785}
2786
2787FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2788 bool KeepBraces,
2789 bool IsVerilogAssert) {
2790 assert((FormatTok->is(tok::kw_if) ||
2791 (Style.isVerilog() &&
2792 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2793 Keywords.kw_assume, Keywords.kw_cover))) &&
2794 "'if' expected");
2795 nextToken();
2796
2797 if (IsVerilogAssert) {
2798 // Handle `assert #0` and `assert final`.
2799 if (FormatTok->is(Keywords.kw_verilogHash)) {
2800 nextToken();
2801 if (FormatTok->is(tok::numeric_constant))
2802 nextToken();
2803 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2804 Keywords.kw_sequence)) {
2805 nextToken();
2806 }
2807 }
2808
2809 // TableGen's if statement has the form of `if <cond> then { ... }`.
2810 if (Style.isTableGen()) {
2811 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2812 // Simply skip until then. This range only contains a value.
2813 nextToken();
2814 }
2815 }
2816
2817 // Handle `if !consteval`.
2818 if (FormatTok->is(tok::exclaim))
2819 nextToken();
2820
2821 bool KeepIfBraces = true;
2822 if (FormatTok->is(tok::kw_consteval)) {
2823 nextToken();
2824 } else {
2825 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2826 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2827 nextToken();
2828 if (FormatTok->is(tok::l_paren)) {
2829 FormatTok->setFinalizedType(TT_ConditionLParen);
2830 parseParens();
2831 }
2832 }
2833 handleAttributes();
2834 // The then action is optional in Verilog assert statements.
2835 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2836 nextToken();
2837 addUnwrappedLine();
2838 return nullptr;
2839 }
2840
2841 bool NeedsUnwrappedLine = false;
2842 keepAncestorBraces();
2843
2844 FormatToken *IfLeftBrace = nullptr;
2845 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2846
2847 if (isBlockBegin(*FormatTok)) {
2848 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2849 IfLeftBrace = FormatTok;
2850 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2851 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2852 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2853 setPreviousRBraceType(TT_ControlStatementRBrace);
2854 if (Style.BraceWrapping.BeforeElse)
2855 addUnwrappedLine();
2856 else
2857 NeedsUnwrappedLine = true;
2858 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2859 addUnwrappedLine();
2860 } else {
2861 parseUnbracedBody();
2862 }
2863
2864 if (Style.RemoveBracesLLVM) {
2865 assert(!NestedTooDeep.empty());
2866 KeepIfBraces = KeepIfBraces ||
2867 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2868 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2869 IfBlockKind == IfStmtKind::IfElseIf;
2870 }
2871
2872 bool KeepElseBraces = KeepIfBraces;
2873 FormatToken *ElseLeftBrace = nullptr;
2874 IfStmtKind Kind = IfStmtKind::IfOnly;
2875
2876 if (FormatTok->is(tok::kw_else)) {
2877 if (Style.RemoveBracesLLVM) {
2878 NestedTooDeep.back() = false;
2879 Kind = IfStmtKind::IfElse;
2880 }
2881 nextToken();
2882 handleAttributes();
2883 if (isBlockBegin(*FormatTok)) {
2884 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2885 FormatTok->setFinalizedType(TT_ElseLBrace);
2886 ElseLeftBrace = FormatTok;
2887 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2888 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2889 FormatToken *IfLBrace =
2890 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2891 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2892 setPreviousRBraceType(TT_ElseRBrace);
2893 if (FormatTok->is(tok::kw_else)) {
2894 KeepElseBraces = KeepElseBraces ||
2895 ElseBlockKind == IfStmtKind::IfOnly ||
2896 ElseBlockKind == IfStmtKind::IfElseIf;
2897 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2898 KeepElseBraces = true;
2899 assert(ElseLeftBrace->MatchingParen);
2900 markOptionalBraces(ElseLeftBrace);
2901 }
2902 addUnwrappedLine();
2903 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2904 const FormatToken *Previous = Tokens->getPreviousToken();
2905 assert(Previous);
2906 const bool IsPrecededByComment = Previous->is(tok::comment);
2907 if (IsPrecededByComment) {
2908 addUnwrappedLine();
2909 ++Line->Level;
2910 }
2911 bool TooDeep = true;
2912 if (Style.RemoveBracesLLVM) {
2913 Kind = IfStmtKind::IfElseIf;
2914 TooDeep = NestedTooDeep.pop_back_val();
2915 }
2916 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2917 if (Style.RemoveBracesLLVM)
2918 NestedTooDeep.push_back(TooDeep);
2919 if (IsPrecededByComment)
2920 --Line->Level;
2921 } else {
2922 parseUnbracedBody(/*CheckEOF=*/true);
2923 }
2924 } else {
2925 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2926 if (NeedsUnwrappedLine)
2927 addUnwrappedLine();
2928 }
2929
2930 if (!Style.RemoveBracesLLVM)
2931 return nullptr;
2932
2933 assert(!NestedTooDeep.empty());
2934 KeepElseBraces = KeepElseBraces ||
2935 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2936 NestedTooDeep.back();
2937
2938 NestedTooDeep.pop_back();
2939
2940 if (!KeepIfBraces && !KeepElseBraces) {
2941 markOptionalBraces(IfLeftBrace);
2942 markOptionalBraces(ElseLeftBrace);
2943 } else if (IfLeftBrace) {
2944 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2945 if (IfRightBrace) {
2946 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2947 assert(!IfLeftBrace->Optional);
2948 assert(!IfRightBrace->Optional);
2949 IfLeftBrace->MatchingParen = nullptr;
2950 IfRightBrace->MatchingParen = nullptr;
2951 }
2952 }
2953
2954 if (IfKind)
2955 *IfKind = Kind;
2956
2957 return IfLeftBrace;
2958}
2959
2960void UnwrappedLineParser::parseTryCatch() {
2961 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2962 nextToken();
2963 bool NeedsUnwrappedLine = false;
2964 bool HasCtorInitializer = false;
2965 if (FormatTok->is(tok::colon)) {
2966 auto *Colon = FormatTok;
2967 // We are in a function try block, what comes is an initializer list.
2968 nextToken();
2969 if (FormatTok->is(tok::identifier)) {
2970 HasCtorInitializer = true;
2971 Colon->setFinalizedType(TT_CtorInitializerColon);
2972 }
2973
2974 // In case identifiers were removed by clang-tidy, what might follow is
2975 // multiple commas in sequence - before the first identifier.
2976 while (FormatTok->is(tok::comma))
2977 nextToken();
2978
2979 while (FormatTok->is(tok::identifier)) {
2980 nextToken();
2981 if (FormatTok->is(tok::l_paren)) {
2982 parseParens();
2983 } else if (FormatTok->is(tok::l_brace)) {
2984 nextToken();
2985 parseBracedList();
2986 }
2987
2988 // In case identifiers were removed by clang-tidy, what might follow is
2989 // multiple commas in sequence - after the first identifier.
2990 while (FormatTok->is(tok::comma))
2991 nextToken();
2992 }
2993 }
2994 // Parse try with resource.
2995 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2996 parseParens();
2997
2998 keepAncestorBraces();
2999
3000 if (FormatTok->is(tok::l_brace)) {
3001 if (HasCtorInitializer)
3002 FormatTok->setFinalizedType(TT_FunctionLBrace);
3003 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3004 parseBlock();
3005 if (Style.BraceWrapping.BeforeCatch)
3006 addUnwrappedLine();
3007 else
3008 NeedsUnwrappedLine = true;
3009 } else if (FormatTok->isNot(tok::kw_catch)) {
3010 // The C++ standard requires a compound-statement after a try.
3011 // If there's none, we try to assume there's a structuralElement
3012 // and try to continue.
3013 addUnwrappedLine();
3014 ++Line->Level;
3015 parseStructuralElement();
3016 --Line->Level;
3017 }
3018 while (true) {
3019 if (FormatTok->is(tok::at))
3020 nextToken();
3021 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3022 tok::kw___finally) ||
3023 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3024 FormatTok->is(Keywords.kw_finally)) ||
3025 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3026 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3027 break;
3028 }
3029 nextToken();
3030 while (FormatTok->isNot(tok::l_brace)) {
3031 if (FormatTok->is(tok::l_paren)) {
3032 parseParens();
3033 continue;
3034 }
3035 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3036 if (Style.RemoveBracesLLVM)
3037 NestedTooDeep.pop_back();
3038 return;
3039 }
3040 nextToken();
3041 }
3042 NeedsUnwrappedLine = false;
3043 Line->MustBeDeclaration = false;
3044 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3045 parseBlock();
3046 if (Style.BraceWrapping.BeforeCatch)
3047 addUnwrappedLine();
3048 else
3049 NeedsUnwrappedLine = true;
3050 }
3051
3052 if (Style.RemoveBracesLLVM)
3053 NestedTooDeep.pop_back();
3054
3055 if (NeedsUnwrappedLine)
3056 addUnwrappedLine();
3057}
3058
3059void UnwrappedLineParser::parseNamespace() {
3060 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3061 "'namespace' expected");
3062
3063 const FormatToken &InitialToken = *FormatTok;
3064 nextToken();
3065 if (InitialToken.is(TT_NamespaceMacro)) {
3066 parseParens();
3067 } else {
3068 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3069 tok::l_square, tok::period, tok::l_paren) ||
3070 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3071 if (FormatTok->is(tok::l_square))
3072 parseSquare();
3073 else if (FormatTok->is(tok::l_paren))
3074 parseParens();
3075 else
3076 nextToken();
3077 }
3078 }
3079 if (FormatTok->is(tok::l_brace)) {
3080 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3081
3082 if (ShouldBreakBeforeBrace(Style, InitialToken))
3083 addUnwrappedLine();
3084
3085 unsigned AddLevels =
3088 DeclarationScopeStack.size() > 1)
3089 ? 1u
3090 : 0u;
3091 bool ManageWhitesmithsBraces =
3092 AddLevels == 0u &&
3094
3095 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3096 // the whole block.
3097 if (ManageWhitesmithsBraces)
3098 ++Line->Level;
3099
3100 // Munch the semicolon after a namespace. This is more common than one would
3101 // think. Putting the semicolon into its own line is very ugly.
3102 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3103 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3104 ManageWhitesmithsBraces);
3105
3106 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3107
3108 if (ManageWhitesmithsBraces)
3109 --Line->Level;
3110 }
3111 // FIXME: Add error handling.
3112}
3113
3114void UnwrappedLineParser::parseNew() {
3115 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3116 nextToken();
3117
3118 if (Style.isCSharp()) {
3119 do {
3120 // Handle constructor invocation, e.g. `new(field: value)`.
3121 if (FormatTok->is(tok::l_paren))
3122 parseParens();
3123
3124 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3125 if (FormatTok->is(tok::l_brace))
3126 parseBracedList();
3127
3128 if (FormatTok->isOneOf(tok::semi, tok::comma))
3129 return;
3130
3131 nextToken();
3132 } while (!eof());
3133 }
3134
3135 if (Style.Language != FormatStyle::LK_Java)
3136 return;
3137
3138 // In Java, we can parse everything up to the parens, which aren't optional.
3139 do {
3140 // There should not be a ;, { or } before the new's open paren.
3141 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3142 return;
3143
3144 // Consume the parens.
3145 if (FormatTok->is(tok::l_paren)) {
3146 parseParens();
3147
3148 // If there is a class body of an anonymous class, consume that as child.
3149 if (FormatTok->is(tok::l_brace))
3150 parseChildBlock();
3151 return;
3152 }
3153 nextToken();
3154 } while (!eof());
3155}
3156
3157void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3158 keepAncestorBraces();
3159
3160 if (isBlockBegin(*FormatTok)) {
3161 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3162 FormatToken *LeftBrace = FormatTok;
3163 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3164 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3165 /*MunchSemi=*/true, KeepBraces);
3166 setPreviousRBraceType(TT_ControlStatementRBrace);
3167 if (!KeepBraces) {
3168 assert(!NestedTooDeep.empty());
3169 if (!NestedTooDeep.back())
3170 markOptionalBraces(LeftBrace);
3171 }
3172 if (WrapRightBrace)
3173 addUnwrappedLine();
3174 } else {
3175 parseUnbracedBody();
3176 }
3177
3178 if (!KeepBraces)
3179 NestedTooDeep.pop_back();
3180}
3181
3182void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3183 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3184 (Style.isVerilog() &&
3185 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3186 Keywords.kw_always_ff, Keywords.kw_always_latch,
3187 Keywords.kw_final, Keywords.kw_initial,
3188 Keywords.kw_foreach, Keywords.kw_forever,
3189 Keywords.kw_repeat))) &&
3190 "'for', 'while' or foreach macro expected");
3191 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3192 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3193
3194 nextToken();
3195 // JS' for await ( ...
3196 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3197 nextToken();
3198 if (IsCpp && FormatTok->is(tok::kw_co_await))
3199 nextToken();
3200 if (HasParens && FormatTok->is(tok::l_paren)) {
3201 // The type is only set for Verilog basically because we were afraid to
3202 // change the existing behavior for loops. See the discussion on D121756 for
3203 // details.
3204 if (Style.isVerilog())
3205 FormatTok->setFinalizedType(TT_ConditionLParen);
3206 parseParens();
3207 }
3208
3209 if (Style.isVerilog()) {
3210 // Event control.
3211 parseVerilogSensitivityList();
3212 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3213 Tokens->getPreviousToken()->is(tok::r_paren)) {
3214 nextToken();
3215 addUnwrappedLine();
3216 return;
3217 }
3218
3219 handleAttributes();
3220 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3221}
3222
3223void UnwrappedLineParser::parseDoWhile() {
3224 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3225 nextToken();
3226
3227 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3228
3229 // FIXME: Add error handling.
3230 if (FormatTok->isNot(tok::kw_while)) {
3231 addUnwrappedLine();
3232 return;
3233 }
3234
3235 FormatTok->setFinalizedType(TT_DoWhile);
3236
3237 // If in Whitesmiths mode, the line with the while() needs to be indented
3238 // to the same level as the block.
3240 ++Line->Level;
3241
3242 nextToken();
3243 parseStructuralElement();
3244}
3245
3246void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3247 nextToken();
3248 unsigned OldLineLevel = Line->Level;
3249
3250 if (LeftAlignLabel)
3251 Line->Level = 0;
3252 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3253 --Line->Level;
3254
3255 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3256 FormatTok->is(tok::l_brace)) {
3257
3258 CompoundStatementIndenter Indenter(this, Line->Level,
3261 parseBlock();
3262 if (FormatTok->is(tok::kw_break)) {
3265 addUnwrappedLine();
3266 if (!Style.IndentCaseBlocks &&
3268 ++Line->Level;
3269 }
3270 }
3271 parseStructuralElement();
3272 }
3273 addUnwrappedLine();
3274 } else {
3275 if (FormatTok->is(tok::semi))
3276 nextToken();
3277 addUnwrappedLine();
3278 }
3279 Line->Level = OldLineLevel;
3280 if (FormatTok->isNot(tok::l_brace)) {
3281 parseStructuralElement();
3282 addUnwrappedLine();
3283 }
3284}
3285
3286void UnwrappedLineParser::parseCaseLabel() {
3287 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3288 auto *Case = FormatTok;
3289
3290 // FIXME: fix handling of complex expressions here.
3291 do {
3292 nextToken();
3293 if (FormatTok->is(tok::colon)) {
3294 FormatTok->setFinalizedType(TT_CaseLabelColon);
3295 break;
3296 }
3297 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3298 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3299 Case->setFinalizedType(TT_SwitchExpressionLabel);
3300 break;
3301 }
3302 } while (!eof());
3303 parseLabel();
3304}
3305
3306void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3307 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3308 nextToken();
3309 if (FormatTok->is(tok::l_paren))
3310 parseParens();
3311
3312 keepAncestorBraces();
3313
3314 if (FormatTok->is(tok::l_brace)) {
3315 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3316 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3317 : TT_ControlStatementLBrace);
3318 if (IsExpr)
3319 parseChildBlock();
3320 else
3321 parseBlock();
3322 setPreviousRBraceType(TT_ControlStatementRBrace);
3323 if (!IsExpr)
3324 addUnwrappedLine();
3325 } else {
3326 addUnwrappedLine();
3327 ++Line->Level;
3328 parseStructuralElement();
3329 --Line->Level;
3330 }
3331
3332 if (Style.RemoveBracesLLVM)
3333 NestedTooDeep.pop_back();
3334}
3335
3336// Operators that can follow a C variable.
3338 switch (Kind) {
3339 case tok::ampamp:
3340 case tok::ampequal:
3341 case tok::arrow:
3342 case tok::caret:
3343 case tok::caretequal:
3344 case tok::comma:
3345 case tok::ellipsis:
3346 case tok::equal:
3347 case tok::equalequal:
3348 case tok::exclaim:
3349 case tok::exclaimequal:
3350 case tok::greater:
3351 case tok::greaterequal:
3352 case tok::greatergreater:
3353 case tok::greatergreaterequal:
3354 case tok::l_paren:
3355 case tok::l_square:
3356 case tok::less:
3357 case tok::lessequal:
3358 case tok::lessless:
3359 case tok::lesslessequal:
3360 case tok::minus:
3361 case tok::minusequal:
3362 case tok::minusminus:
3363 case tok::percent:
3364 case tok::percentequal:
3365 case tok::period:
3366 case tok::pipe:
3367 case tok::pipeequal:
3368 case tok::pipepipe:
3369 case tok::plus:
3370 case tok::plusequal:
3371 case tok::plusplus:
3372 case tok::question:
3373 case tok::r_brace:
3374 case tok::r_paren:
3375 case tok::r_square:
3376 case tok::semi:
3377 case tok::slash:
3378 case tok::slashequal:
3379 case tok::star:
3380 case tok::starequal:
3381 return true;
3382 default:
3383 return false;
3384 }
3385}
3386
3387void UnwrappedLineParser::parseAccessSpecifier() {
3388 FormatToken *AccessSpecifierCandidate = FormatTok;
3389 nextToken();
3390 // Understand Qt's slots.
3391 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3392 nextToken();
3393 // Otherwise, we don't know what it is, and we'd better keep the next token.
3394 if (FormatTok->is(tok::colon)) {
3395 nextToken();
3396 addUnwrappedLine();
3397 } else if (FormatTok->isNot(tok::coloncolon) &&
3398 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3399 // Not a variable name nor namespace name.
3400 addUnwrappedLine();
3401 } else if (AccessSpecifierCandidate) {
3402 // Consider the access specifier to be a C identifier.
3403 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3404 }
3405}
3406
3407/// \brief Parses a requires, decides if it is a clause or an expression.
3408/// \pre The current token has to be the requires keyword.
3409/// \returns true if it parsed a clause.
3410bool UnwrappedLineParser::parseRequires() {
3411 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3412 auto RequiresToken = FormatTok;
3413
3414 // We try to guess if it is a requires clause, or a requires expression. For
3415 // that we first consume the keyword and check the next token.
3416 nextToken();
3417
3418 switch (FormatTok->Tok.getKind()) {
3419 case tok::l_brace:
3420 // This can only be an expression, never a clause.
3421 parseRequiresExpression(RequiresToken);
3422 return false;
3423 case tok::l_paren:
3424 // Clauses and expression can start with a paren, it's unclear what we have.
3425 break;
3426 default:
3427 // All other tokens can only be a clause.
3428 parseRequiresClause(RequiresToken);
3429 return true;
3430 }
3431
3432 // Looking forward we would have to decide if there are function declaration
3433 // like arguments to the requires expression:
3434 // requires (T t) {
3435 // Or there is a constraint expression for the requires clause:
3436 // requires (C<T> && ...
3437
3438 // But first let's look behind.
3439 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3440
3441 if (!PreviousNonComment ||
3442 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3443 // If there is no token, or an expression left brace, we are a requires
3444 // clause within a requires expression.
3445 parseRequiresClause(RequiresToken);
3446 return true;
3447 }
3448
3449 switch (PreviousNonComment->Tok.getKind()) {
3450 case tok::greater:
3451 case tok::r_paren:
3452 case tok::kw_noexcept:
3453 case tok::kw_const:
3454 // This is a requires clause.
3455 parseRequiresClause(RequiresToken);
3456 return true;
3457 case tok::amp:
3458 case tok::ampamp: {
3459 // This can be either:
3460 // if (... && requires (T t) ...)
3461 // Or
3462 // void member(...) && requires (C<T> ...
3463 // We check the one token before that for a const:
3464 // void member(...) const && requires (C<T> ...
3465 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3466 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3467 parseRequiresClause(RequiresToken);
3468 return true;
3469 }
3470 break;
3471 }
3472 default:
3473 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3474 // This is a requires clause.
3475 parseRequiresClause(RequiresToken);
3476 return true;
3477 }
3478 // It's an expression.
3479 parseRequiresExpression(RequiresToken);
3480 return false;
3481 }
3482
3483 // Now we look forward and try to check if the paren content is a parameter
3484 // list. The parameters can be cv-qualified and contain references or
3485 // pointers.
3486 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3487 // of stuff: typename, const, *, &, &&, ::, identifiers.
3488
3489 unsigned StoredPosition = Tokens->getPosition();
3490 FormatToken *NextToken = Tokens->getNextToken();
3491 int Lookahead = 0;
3492 auto PeekNext = [&Lookahead, &NextToken, this] {
3493 ++Lookahead;
3494 NextToken = Tokens->getNextToken();
3495 };
3496
3497 bool FoundType = false;
3498 bool LastWasColonColon = false;
3499 int OpenAngles = 0;
3500
3501 for (; Lookahead < 50; PeekNext()) {
3502 switch (NextToken->Tok.getKind()) {
3503 case tok::kw_volatile:
3504 case tok::kw_const:
3505 case tok::comma:
3506 if (OpenAngles == 0) {
3507 FormatTok = Tokens->setPosition(StoredPosition);
3508 parseRequiresExpression(RequiresToken);
3509 return false;
3510 }
3511 break;
3512 case tok::eof:
3513 // Break out of the loop.
3514 Lookahead = 50;
3515 break;
3516 case tok::coloncolon:
3517 LastWasColonColon = true;
3518 break;
3519 case tok::kw_decltype:
3520 case tok::identifier:
3521 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3522 FormatTok = Tokens->setPosition(StoredPosition);
3523 parseRequiresExpression(RequiresToken);
3524 return false;
3525 }
3526 FoundType = true;
3527 LastWasColonColon = false;
3528 break;
3529 case tok::less:
3530 ++OpenAngles;
3531 break;
3532 case tok::greater:
3533 --OpenAngles;
3534 break;
3535 default:
3536 if (NextToken->isTypeName(LangOpts)) {
3537 FormatTok = Tokens->setPosition(StoredPosition);
3538 parseRequiresExpression(RequiresToken);
3539 return false;
3540 }
3541 break;
3542 }
3543 }
3544 // This seems to be a complicated expression, just assume it's a clause.
3545 FormatTok = Tokens->setPosition(StoredPosition);
3546 parseRequiresClause(RequiresToken);
3547 return true;
3548}
3549
3550/// \brief Parses a requires clause.
3551/// \param RequiresToken The requires keyword token, which starts this clause.
3552/// \pre We need to be on the next token after the requires keyword.
3553/// \sa parseRequiresExpression
3554///
3555/// Returns if it either has finished parsing the clause, or it detects, that
3556/// the clause is incorrect.
3557void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3558 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3559 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3560
3561 // If there is no previous token, we are within a requires expression,
3562 // otherwise we will always have the template or function declaration in front
3563 // of it.
3564 bool InRequiresExpression =
3565 !RequiresToken->Previous ||
3566 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3567
3568 RequiresToken->setFinalizedType(InRequiresExpression
3569 ? TT_RequiresClauseInARequiresExpression
3570 : TT_RequiresClause);
3571
3572 // NOTE: parseConstraintExpression is only ever called from this function.
3573 // It could be inlined into here.
3574 parseConstraintExpression();
3575
3576 if (!InRequiresExpression)
3577 FormatTok->Previous->ClosesRequiresClause = true;
3578}
3579
3580/// \brief Parses a requires expression.
3581/// \param RequiresToken The requires keyword token, which starts this clause.
3582/// \pre We need to be on the next token after the requires keyword.
3583/// \sa parseRequiresClause
3584///
3585/// Returns if it either has finished parsing the expression, or it detects,
3586/// that the expression is incorrect.
3587void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3588 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3589 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3590
3591 RequiresToken->setFinalizedType(TT_RequiresExpression);
3592
3593 if (FormatTok->is(tok::l_paren)) {
3594 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3595 parseParens();
3596 }
3597
3598 if (FormatTok->is(tok::l_brace)) {
3599 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3600 parseChildBlock();
3601 }
3602}
3603
3604/// \brief Parses a constraint expression.
3605///
3606/// This is the body of a requires clause. It returns, when the parsing is
3607/// complete, or the expression is incorrect.
3608void UnwrappedLineParser::parseConstraintExpression() {
3609 // The special handling for lambdas is needed since tryToParseLambda() eats a
3610 // token and if a requires expression is the last part of a requires clause
3611 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3612 // not set on the correct token. Thus we need to be aware if we even expect a
3613 // lambda to be possible.
3614 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3615 bool LambdaNextTimeAllowed = true;
3616
3617 // Within lambda declarations, it is permitted to put a requires clause after
3618 // its template parameter list, which would place the requires clause right
3619 // before the parentheses of the parameters of the lambda declaration. Thus,
3620 // we track if we expect to see grouping parentheses at all.
3621 // Without this check, `requires foo<T> (T t)` in the below example would be
3622 // seen as the whole requires clause, accidentally eating the parameters of
3623 // the lambda.
3624 // [&]<typename T> requires foo<T> (T t) { ... };
3625 bool TopLevelParensAllowed = true;
3626
3627 do {
3628 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3629
3630 switch (FormatTok->Tok.getKind()) {
3631 case tok::kw_requires: {
3632 auto RequiresToken = FormatTok;
3633 nextToken();
3634 parseRequiresExpression(RequiresToken);
3635 break;
3636 }
3637
3638 case tok::l_paren:
3639 if (!TopLevelParensAllowed)
3640 return;
3641 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3642 TopLevelParensAllowed = false;
3643 break;
3644
3645 case tok::l_square:
3646 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3647 return;
3648 break;
3649
3650 case tok::kw_const:
3651 case tok::semi:
3652 case tok::kw_class:
3653 case tok::kw_struct:
3654 case tok::kw_union:
3655 return;
3656
3657 case tok::l_brace:
3658 // Potential function body.
3659 return;
3660
3661 case tok::ampamp:
3662 case tok::pipepipe:
3663 FormatTok->setFinalizedType(TT_BinaryOperator);
3664 nextToken();
3665 LambdaNextTimeAllowed = true;
3666 TopLevelParensAllowed = true;
3667 break;
3668
3669 case tok::comma:
3670 case tok::comment:
3671 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3672 nextToken();
3673 break;
3674
3675 case tok::kw_sizeof:
3676 case tok::greater:
3677 case tok::greaterequal:
3678 case tok::greatergreater:
3679 case tok::less:
3680 case tok::lessequal:
3681 case tok::lessless:
3682 case tok::equalequal:
3683 case tok::exclaim:
3684 case tok::exclaimequal:
3685 case tok::plus:
3686 case tok::minus:
3687 case tok::star:
3688 case tok::slash:
3689 LambdaNextTimeAllowed = true;
3690 TopLevelParensAllowed = true;
3691 // Just eat them.
3692 nextToken();
3693 break;
3694
3695 case tok::numeric_constant:
3696 case tok::coloncolon:
3697 case tok::kw_true:
3698 case tok::kw_false:
3699 TopLevelParensAllowed = false;
3700 // Just eat them.
3701 nextToken();
3702 break;
3703
3704 case tok::kw_static_cast:
3705 case tok::kw_const_cast:
3706 case tok::kw_reinterpret_cast:
3707 case tok::kw_dynamic_cast:
3708 nextToken();
3709 if (FormatTok->isNot(tok::less))
3710 return;
3711
3712 nextToken();
3713 parseBracedList(/*IsAngleBracket=*/true);
3714 break;
3715
3716 default:
3717 if (!FormatTok->Tok.getIdentifierInfo()) {
3718 // Identifiers are part of the default case, we check for more then
3719 // tok::identifier to handle builtin type traits.
3720 return;
3721 }
3722
3723 // We need to differentiate identifiers for a template deduction guide,
3724 // variables, or function return types (the constraint expression has
3725 // ended before that), and basically all other cases. But it's easier to
3726 // check the other way around.
3727 assert(FormatTok->Previous);
3728 switch (FormatTok->Previous->Tok.getKind()) {
3729 case tok::coloncolon: // Nested identifier.
3730 case tok::ampamp: // Start of a function or variable for the
3731 case tok::pipepipe: // constraint expression. (binary)
3732 case tok::exclaim: // The same as above, but unary.
3733 case tok::kw_requires: // Initial identifier of a requires clause.
3734 case tok::equal: // Initial identifier of a concept declaration.
3735 break;
3736 default:
3737 return;
3738 }
3739
3740 // Read identifier with optional template declaration.
3741 nextToken();
3742 if (FormatTok->is(tok::less)) {
3743 nextToken();
3744 parseBracedList(/*IsAngleBracket=*/true);
3745 }
3746 TopLevelParensAllowed = false;
3747 break;
3748 }
3749 } while (!eof());
3750}
3751
3752bool UnwrappedLineParser::parseEnum() {
3753 const FormatToken &InitialToken = *FormatTok;
3754
3755 // Won't be 'enum' for NS_ENUMs.
3756 if (FormatTok->is(tok::kw_enum))
3757 nextToken();
3758
3759 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3760 // declarations. An "enum" keyword followed by a colon would be a syntax
3761 // error and thus assume it is just an identifier.
3762 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3763 return false;
3764
3765 // In protobuf, "enum" can be used as a field name.
3766 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3767 return false;
3768
3769 if (IsCpp) {
3770 // Eat up enum class ...
3771 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3772 nextToken();
3773 while (FormatTok->is(tok::l_square))
3774 if (!handleCppAttributes())
3775 return false;
3776 }
3777
3778 while (FormatTok->Tok.getIdentifierInfo() ||
3779 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3780 tok::greater, tok::comma, tok::question,
3781 tok::l_square)) {
3782 if (Style.isVerilog()) {
3783 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3784 nextToken();
3785 // In Verilog the base type can have dimensions.
3786 while (FormatTok->is(tok::l_square))
3787 parseSquare();
3788 } else {
3789 nextToken();
3790 }
3791 // We can have macros or attributes in between 'enum' and the enum name.
3792 if (FormatTok->is(tok::l_paren))
3793 parseParens();
3794 if (FormatTok->is(tok::identifier)) {
3795 nextToken();
3796 // If there are two identifiers in a row, this is likely an elaborate
3797 // return type. In Java, this can be "implements", etc.
3798 if (IsCpp && FormatTok->is(tok::identifier))
3799 return false;
3800 }
3801 }
3802
3803 // Just a declaration or something is wrong.
3804 if (FormatTok->isNot(tok::l_brace))
3805 return true;
3806 FormatTok->setFinalizedType(TT_EnumLBrace);
3807 FormatTok->setBlockKind(BK_Block);
3808
3809 if (Style.Language == FormatStyle::LK_Java) {
3810 // Java enums are different.
3811 parseJavaEnumBody();
3812 return true;
3813 }
3814 if (Style.Language == FormatStyle::LK_Proto) {
3815 parseBlock(/*MustBeDeclaration=*/true);
3816 return true;
3817 }
3818
3819 if (!Style.AllowShortEnumsOnASingleLine &&
3820 ShouldBreakBeforeBrace(Style, InitialToken)) {
3821 addUnwrappedLine();
3822 }
3823 // Parse enum body.
3824 nextToken();
3825 if (!Style.AllowShortEnumsOnASingleLine) {
3826 addUnwrappedLine();
3827 Line->Level += 1;
3828 }
3829 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3831 Line->Level -= 1;
3832 if (HasError) {
3833 if (FormatTok->is(tok::semi))
3834 nextToken();
3835 addUnwrappedLine();
3836 }
3837 setPreviousRBraceType(TT_EnumRBrace);
3838 return true;
3839
3840 // There is no addUnwrappedLine() here so that we fall through to parsing a
3841 // structural element afterwards. Thus, in "enum A {} n, m;",
3842 // "} n, m;" will end up in one unwrapped line.
3843}
3844
3845bool UnwrappedLineParser::parseStructLike() {
3846 // parseRecord falls through and does not yet add an unwrapped line as a
3847 // record declaration or definition can start a structural element.
3848 parseRecord();
3849 // This does not apply to Java, JavaScript and C#.
3850 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3851 Style.isCSharp()) {
3852 if (FormatTok->is(tok::semi))
3853 nextToken();
3854 addUnwrappedLine();
3855 return true;
3856 }
3857 return false;
3858}
3859
3860namespace {
3861// A class used to set and restore the Token position when peeking
3862// ahead in the token source.
3863class ScopedTokenPosition {
3864 unsigned StoredPosition;
3865 FormatTokenSource *Tokens;
3866
3867public:
3868 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3869 assert(Tokens && "Tokens expected to not be null");
3870 StoredPosition = Tokens->getPosition();
3871 }
3872
3873 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3874};
3875} // namespace
3876
3877// Look to see if we have [[ by looking ahead, if
3878// its not then rewind to the original position.
3879bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3880 ScopedTokenPosition AutoPosition(Tokens);
3881 FormatToken *Tok = Tokens->getNextToken();
3882 // We already read the first [ check for the second.
3883 if (Tok->isNot(tok::l_square))
3884 return false;
3885 // Double check that the attribute is just something
3886 // fairly simple.
3887 while (Tok->isNot(tok::eof)) {
3888 if (Tok->is(tok::r_square))
3889 break;
3890 Tok = Tokens->getNextToken();
3891 }
3892 if (Tok->is(tok::eof))
3893 return false;
3894 Tok = Tokens->getNextToken();
3895 if (Tok->isNot(tok::r_square))
3896 return false;
3897 Tok = Tokens->getNextToken();
3898 if (Tok->is(tok::semi))
3899 return false;
3900 return true;
3901}
3902
3903void UnwrappedLineParser::parseJavaEnumBody() {
3904 assert(FormatTok->is(tok::l_brace));
3905 const FormatToken *OpeningBrace = FormatTok;
3906
3907 // Determine whether the enum is simple, i.e. does not have a semicolon or
3908 // constants with class bodies. Simple enums can be formatted like braced
3909 // lists, contracted to a single line, etc.
3910 unsigned StoredPosition = Tokens->getPosition();
3911 bool IsSimple = true;
3912 FormatToken *Tok = Tokens->getNextToken();
3913 while (Tok->isNot(tok::eof)) {
3914 if (Tok->is(tok::r_brace))
3915 break;
3916 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3917 IsSimple = false;
3918 break;
3919 }
3920 // FIXME: This will also mark enums with braces in the arguments to enum
3921 // constants as "not simple". This is probably fine in practice, though.
3922 Tok = Tokens->getNextToken();
3923 }
3924 FormatTok = Tokens->setPosition(StoredPosition);
3925
3926 if (IsSimple) {
3927 nextToken();
3928 parseBracedList();
3929 addUnwrappedLine();
3930 return;
3931 }
3932
3933 // Parse the body of a more complex enum.
3934 // First add a line for everything up to the "{".
3935 nextToken();
3936 addUnwrappedLine();
3937 ++Line->Level;
3938
3939 // Parse the enum constants.
3940 while (!eof()) {
3941 if (FormatTok->is(tok::l_brace)) {
3942 // Parse the constant's class body.
3943 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3944 /*MunchSemi=*/false);
3945 } else if (FormatTok->is(tok::l_paren)) {
3946 parseParens();
3947 } else if (FormatTok->is(tok::comma)) {
3948 nextToken();
3949 addUnwrappedLine();
3950 } else if (FormatTok->is(tok::semi)) {
3951 nextToken();
3952 addUnwrappedLine();
3953 break;
3954 } else if (FormatTok->is(tok::r_brace)) {
3955 addUnwrappedLine();
3956 break;
3957 } else {
3958 nextToken();
3959 }
3960 }
3961
3962 // Parse the class body after the enum's ";" if any.
3963 parseLevel(OpeningBrace);
3964 nextToken();
3965 --Line->Level;
3966 addUnwrappedLine();
3967}
3968
3969void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3970 const FormatToken &InitialToken = *FormatTok;
3971 nextToken();
3972
3973 const FormatToken *ClassName = nullptr;
3974 bool IsDerived = false;
3975 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3976 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3977 };
3978 // The actual identifier can be a nested name specifier, and in macros
3979 // it is often token-pasted.
3980 // An [[attribute]] can be before the identifier.
3981 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3982 tok::kw_alignas, tok::l_square) ||
3983 FormatTok->isAttribute() ||
3984 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3985 FormatTok->isOneOf(tok::period, tok::comma))) {
3986 if (Style.isJavaScript() &&
3987 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3988 // JavaScript/TypeScript supports inline object types in
3989 // extends/implements positions:
3990 // class Foo implements {bar: number} { }
3991 nextToken();
3992 if (FormatTok->is(tok::l_brace)) {
3993 tryToParseBracedList();
3994 continue;
3995 }
3996 }
3997 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3998 continue;
3999 const auto *Previous = FormatTok;
4000 nextToken();
4001 switch (FormatTok->Tok.getKind()) {
4002 case tok::l_paren:
4003 // We can have macros in between 'class' and the class name.
4004 if (!IsNonMacroIdentifier(Previous) ||
4005 // e.g. `struct macro(a) S { int i; };`
4006 Previous->Previous == &InitialToken) {
4007 parseParens();
4008 }
4009 break;
4010 case tok::coloncolon:
4011 break;
4012 default:
4013 if (!ClassName && Previous->is(tok::identifier) &&
4014 Previous->isNot(TT_AttributeMacro)) {
4015 ClassName = Previous;
4016 }
4017 }
4018 }
4019
4020 auto IsListInitialization = [&] {
4021 if (!ClassName || IsDerived)
4022 return false;
4023 assert(FormatTok->is(tok::l_brace));
4024 const auto *Prev = FormatTok->getPreviousNonComment();
4025 assert(Prev);
4026 return Prev != ClassName && Prev->is(tok::identifier) &&
4027 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4028 };
4029
4030 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4031 int AngleNestingLevel = 0;
4032 do {
4033 if (FormatTok->is(tok::less))
4034 ++AngleNestingLevel;
4035 else if (FormatTok->is(tok::greater))
4036 --AngleNestingLevel;
4037
4038 if (AngleNestingLevel == 0) {
4039 if (FormatTok->is(tok::colon)) {
4040 IsDerived = true;
4041 } else if (FormatTok->is(tok::identifier) &&
4042 FormatTok->Previous->is(tok::coloncolon)) {
4043 ClassName = FormatTok;
4044 } else if (FormatTok->is(tok::l_paren) &&
4045 IsNonMacroIdentifier(FormatTok->Previous)) {
4046 break;
4047 }
4048 }
4049 if (FormatTok->is(tok::l_brace)) {
4050 if (AngleNestingLevel == 0 && IsListInitialization())
4051 return;
4052 calculateBraceTypes(/*ExpectClassBody=*/true);
4053 if (!tryToParseBracedList())
4054 break;
4055 }
4056 if (FormatTok->is(tok::l_square)) {
4057 FormatToken *Previous = FormatTok->Previous;
4058 if (!Previous || (Previous->isNot(tok::r_paren) &&
4059 !Previous->isTypeOrIdentifier(LangOpts))) {
4060 // Don't try parsing a lambda if we had a closing parenthesis before,
4061 // it was probably a pointer to an array: int (*)[].
4062 if (!tryToParseLambda())
4063 continue;
4064 } else {
4065 parseSquare();
4066 continue;
4067 }
4068 }
4069 if (FormatTok->is(tok::semi))
4070 return;
4071 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4072 addUnwrappedLine();
4073 nextToken();
4074 parseCSharpGenericTypeConstraint();
4075 break;
4076 }
4077 nextToken();
4078 } while (!eof());
4079 }
4080
4081 auto GetBraceTypes =
4082 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4083 switch (RecordTok.Tok.getKind()) {
4084 case tok::kw_class:
4085 return {TT_ClassLBrace, TT_ClassRBrace};
4086 case tok::kw_struct:
4087 return {TT_StructLBrace, TT_StructRBrace};
4088 case tok::kw_union:
4089 return {TT_UnionLBrace, TT_UnionRBrace};
4090 default:
4091 // Useful for e.g. interface.
4092 return {TT_RecordLBrace, TT_RecordRBrace};
4093 }
4094 };
4095 if (FormatTok->is(tok::l_brace)) {
4096 if (IsListInitialization())
4097 return;
4098 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4099 FormatTok->setFinalizedType(OpenBraceType);
4100 if (ParseAsExpr) {
4101 parseChildBlock();
4102 } else {
4103 if (ShouldBreakBeforeBrace(Style, InitialToken))
4104 addUnwrappedLine();
4105
4106 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4107 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4108 }
4109 setPreviousRBraceType(ClosingBraceType);
4110 }
4111 // There is no addUnwrappedLine() here so that we fall through to parsing a
4112 // structural element afterwards. Thus, in "class A {} n, m;",
4113 // "} n, m;" will end up in one unwrapped line.
4114}
4115
4116void UnwrappedLineParser::parseObjCMethod() {
4117 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4118 "'(' or identifier expected.");
4119 do {
4120 if (FormatTok->is(tok::semi)) {
4121 nextToken();
4122 addUnwrappedLine();
4123 return;
4124 } else if (FormatTok->is(tok::l_brace)) {
4125 if (Style.BraceWrapping.AfterFunction)
4126 addUnwrappedLine();
4127 parseBlock();
4128 addUnwrappedLine();
4129 return;
4130 } else {
4131 nextToken();
4132 }
4133 } while (!eof());
4134}
4135
4136void UnwrappedLineParser::parseObjCProtocolList() {
4137 assert(FormatTok->is(tok::less) && "'<' expected.");
4138 do {
4139 nextToken();
4140 // Early exit in case someone forgot a close angle.
4141 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4142 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4143 return;
4144 }
4145 } while (!eof() && FormatTok->isNot(tok::greater));
4146 nextToken(); // Skip '>'.
4147}
4148
4149void UnwrappedLineParser::parseObjCUntilAtEnd() {
4150 do {
4151 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4152 nextToken();
4153 addUnwrappedLine();
4154 break;
4155 }
4156 if (FormatTok->is(tok::l_brace)) {
4157 parseBlock();
4158 // In ObjC interfaces, nothing should be following the "}".
4159 addUnwrappedLine();
4160 } else if (FormatTok->is(tok::r_brace)) {
4161 // Ignore stray "}". parseStructuralElement doesn't consume them.
4162 nextToken();
4163 addUnwrappedLine();
4164 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4165 nextToken();
4166 parseObjCMethod();
4167 } else {
4168 parseStructuralElement();
4169 }
4170 } while (!eof());
4171}
4172
4173void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4174 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4175 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4176 nextToken();
4177 nextToken(); // interface name
4178
4179 // @interface can be followed by a lightweight generic
4180 // specialization list, then either a base class or a category.
4181 if (FormatTok->is(tok::less))
4182 parseObjCLightweightGenerics();
4183 if (FormatTok->is(tok::colon)) {
4184 nextToken();
4185 nextToken(); // base class name
4186 // The base class can also have lightweight generics applied to it.
4187 if (FormatTok->is(tok::less))
4188 parseObjCLightweightGenerics();
4189 } else if (FormatTok->is(tok::l_paren)) {
4190 // Skip category, if present.
4191 parseParens();
4192 }
4193
4194 if (FormatTok->is(tok::less))
4195 parseObjCProtocolList();
4196
4197 if (FormatTok->is(tok::l_brace)) {
4199 addUnwrappedLine();
4200 parseBlock(/*MustBeDeclaration=*/true);
4201 }
4202
4203 // With instance variables, this puts '}' on its own line. Without instance
4204 // variables, this ends the @interface line.
4205 addUnwrappedLine();
4206
4207 parseObjCUntilAtEnd();
4208}
4209
4210void UnwrappedLineParser::parseObjCLightweightGenerics() {
4211 assert(FormatTok->is(tok::less));
4212 // Unlike protocol lists, generic parameterizations support
4213 // nested angles:
4214 //
4215 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4216 // NSObject <NSCopying, NSSecureCoding>
4217 //
4218 // so we need to count how many open angles we have left.
4219 unsigned NumOpenAngles = 1;
4220 do {
4221 nextToken();
4222 // Early exit in case someone forgot a close angle.
4223 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4224 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4225 break;
4226 }
4227 if (FormatTok->is(tok::less)) {
4228 ++NumOpenAngles;
4229 } else if (FormatTok->is(tok::greater)) {
4230 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4231 --NumOpenAngles;
4232 }
4233 } while (!eof() && NumOpenAngles != 0);
4234 nextToken(); // Skip '>'.
4235}
4236
4237// Returns true for the declaration/definition form of @protocol,
4238// false for the expression form.
4239bool UnwrappedLineParser::parseObjCProtocol() {
4240 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4241 nextToken();
4242
4243 if (FormatTok->is(tok::l_paren)) {
4244 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4245 return false;
4246 }
4247
4248 // The definition/declaration form,
4249 // @protocol Foo
4250 // - (int)someMethod;
4251 // @end
4252
4253 nextToken(); // protocol name
4254
4255 if (FormatTok->is(tok::less))
4256 parseObjCProtocolList();
4257
4258 // Check for protocol declaration.
4259 if (FormatTok->is(tok::semi)) {
4260 nextToken();
4261 addUnwrappedLine();
4262 return true;
4263 }
4264
4265 addUnwrappedLine();
4266 parseObjCUntilAtEnd();
4267 return true;
4268}
4269
4270void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4271 bool IsImport = FormatTok->is(Keywords.kw_import);
4272 assert(IsImport || FormatTok->is(tok::kw_export));
4273 nextToken();
4274
4275 // Consume the "default" in "export default class/function".
4276 if (FormatTok->is(tok::kw_default))
4277 nextToken();
4278
4279 // Consume "async function", "function" and "default function", so that these
4280 // get parsed as free-standing JS functions, i.e. do not require a trailing
4281 // semicolon.
4282 if (FormatTok->is(Keywords.kw_async))
4283 nextToken();
4284 if (FormatTok->is(Keywords.kw_function)) {
4285 nextToken();
4286 return;
4287 }
4288
4289 // For imports, `export *`, `export {...}`, consume the rest of the line up
4290 // to the terminating `;`. For everything else, just return and continue
4291 // parsing the structural element, i.e. the declaration or expression for
4292 // `export default`.
4293 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4294 !FormatTok->isStringLiteral() &&
4295 !(FormatTok->is(Keywords.kw_type) &&
4296 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4297 return;
4298 }
4299
4300 while (!eof()) {
4301 if (FormatTok->is(tok::semi))
4302 return;
4303 if (Line->Tokens.empty()) {
4304 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4305 // import statement should terminate.
4306 return;
4307 }
4308 if (FormatTok->is(tok::l_brace)) {
4309 FormatTok->setBlockKind(BK_Block);
4310 nextToken();
4311 parseBracedList();
4312 } else {
4313 nextToken();
4314 }
4315 }
4316}
4317
4318void UnwrappedLineParser::parseStatementMacro() {
4319 nextToken();
4320 if (FormatTok->is(tok::l_paren))
4321 parseParens();
4322 if (FormatTok->is(tok::semi))
4323 nextToken();
4324 addUnwrappedLine();
4325}
4326
4327void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4328 // consume things like a::`b.c[d:e] or a::*
4329 while (true) {
4330 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4331 tok::coloncolon, tok::hash) ||
4332 Keywords.isVerilogIdentifier(*FormatTok)) {
4333 nextToken();
4334 } else if (FormatTok->is(tok::l_square)) {
4335 parseSquare();
4336 } else {
4337 break;
4338 }
4339 }
4340}
4341
4342void UnwrappedLineParser::parseVerilogSensitivityList() {
4343 if (FormatTok->isNot(tok::at))
4344 return;
4345 nextToken();
4346 // A block event expression has 2 at signs.
4347 if (FormatTok->is(tok::at))
4348 nextToken();
4349 switch (FormatTok->Tok.getKind()) {
4350 case tok::star:
4351 nextToken();
4352 break;
4353 case tok::l_paren:
4354 parseParens();
4355 break;
4356 default:
4357 parseVerilogHierarchyIdentifier();
4358 break;
4359 }
4360}
4361
4362unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4363 unsigned AddLevels = 0;
4364
4365 if (FormatTok->is(Keywords.kw_clocking)) {
4366 nextToken();
4367 if (Keywords.isVerilogIdentifier(*FormatTok))
4368 nextToken();
4369 parseVerilogSensitivityList();
4370 if (FormatTok->is(tok::semi))
4371 nextToken();
4372 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4373 Keywords.kw_casez, Keywords.kw_randcase,
4374 Keywords.kw_randsequence)) {
4375 if (Style.IndentCaseLabels)
4376 AddLevels++;
4377 nextToken();
4378 if (FormatTok->is(tok::l_paren)) {
4379 FormatTok->setFinalizedType(TT_ConditionLParen);
4380 parseParens();
4381 }
4382 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4383 nextToken();
4384 // The case header has no semicolon.
4385 } else {
4386 // "module" etc.
4387 nextToken();
4388 // all the words like the name of the module and specifiers like
4389 // "automatic" and the width of function return type
4390 while (true) {
4391 if (FormatTok->is(tok::l_square)) {
4392 auto Prev = FormatTok->getPreviousNonComment();
4393 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4394 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4395 parseSquare();
4396 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4397 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4398 nextToken();
4399 } else {
4400 break;
4401 }
4402 }
4403
4404 auto NewLine = [this]() {
4405 addUnwrappedLine();
4406 Line->IsContinuation = true;
4407 };
4408
4409 // package imports
4410 while (FormatTok->is(Keywords.kw_import)) {
4411 NewLine();
4412 nextToken();
4413 parseVerilogHierarchyIdentifier();
4414 if (FormatTok->is(tok::semi))
4415 nextToken();
4416 }
4417
4418 // parameters and ports
4419 if (FormatTok->is(Keywords.kw_verilogHash)) {
4420 NewLine();
4421 nextToken();
4422 if (FormatTok->is(tok::l_paren)) {
4423 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4424 parseParens();
4425 }
4426 }
4427 if (FormatTok->is(tok::l_paren)) {
4428 NewLine();
4429 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4430 parseParens();
4431 }
4432
4433 // extends and implements
4434 if (FormatTok->is(Keywords.kw_extends)) {
4435 NewLine();
4436 nextToken();
4437 parseVerilogHierarchyIdentifier();
4438 if (FormatTok->is(tok::l_paren))
4439 parseParens();
4440 }
4441 if (FormatTok->is(Keywords.kw_implements)) {
4442 NewLine();
4443 do {
4444 nextToken();
4445 parseVerilogHierarchyIdentifier();
4446 } while (FormatTok->is(tok::comma));
4447 }
4448
4449 // Coverage event for cover groups.
4450 if (FormatTok->is(tok::at)) {
4451 NewLine();
4452 parseVerilogSensitivityList();
4453 }
4454
4455 if (FormatTok->is(tok::semi))
4456 nextToken(/*LevelDifference=*/1);
4457 addUnwrappedLine();
4458 }
4459
4460 return AddLevels;
4461}
4462
4463void UnwrappedLineParser::parseVerilogTable() {
4464 assert(FormatTok->is(Keywords.kw_table));
4465 nextToken(/*LevelDifference=*/1);
4466 addUnwrappedLine();
4467
4468 auto InitialLevel = Line->Level++;
4469 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4470 FormatToken *Tok = FormatTok;
4471 nextToken();
4472 if (Tok->is(tok::semi))
4473 addUnwrappedLine();
4474 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4475 Tok->setFinalizedType(TT_VerilogTableItem);
4476 }
4477 Line->Level = InitialLevel;
4478 nextToken(/*LevelDifference=*/-1);
4479 addUnwrappedLine();
4480}
4481
4482void UnwrappedLineParser::parseVerilogCaseLabel() {
4483 // The label will get unindented in AnnotatingParser. If there are no leading
4484 // spaces, indent the rest here so that things inside the block will be
4485 // indented relative to things outside. We don't use parseLabel because we
4486 // don't know whether this colon is a label or a ternary expression at this
4487 // point.
4488 auto OrigLevel = Line->Level;
4489 auto FirstLine = CurrentLines->size();
4490 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4491 ++Line->Level;
4492 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4493 --Line->Level;
4494 parseStructuralElement();
4495 // Restore the indentation in both the new line and the line that has the
4496 // label.
4497 if (CurrentLines->size() > FirstLine)
4498 (*CurrentLines)[FirstLine].Level = OrigLevel;
4499 Line->Level = OrigLevel;
4500}
4501
4502bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4503 for (const auto &N : Line.Tokens) {
4504 if (N.Tok->MacroCtx)
4505 return true;
4506 for (const UnwrappedLine &Child : N.Children)
4507 if (containsExpansion(Child))
4508 return true;
4509 }
4510 return false;
4511}
4512
4513void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4514 if (Line->Tokens.empty())
4515 return;
4516 LLVM_DEBUG({
4517 if (!parsingPPDirective()) {
4518 llvm::dbgs() << "Adding unwrapped line:\n";
4519 printDebugInfo(*Line);
4520 }
4521 });
4522
4523 // If this line closes a block when in Whitesmiths mode, remember that
4524 // information so that the level can be decreased after the line is added.
4525 // This has to happen after the addition of the line since the line itself
4526 // needs to be indented.
4527 bool ClosesWhitesmithsBlock =
4528 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4530
4531 // If the current line was expanded from a macro call, we use it to
4532 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4533 // line and the unexpanded token stream.
4534 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4535 if (!Reconstruct)
4536 Reconstruct.emplace(Line->Level, Unexpanded);
4537 Reconstruct->addLine(*Line);
4538
4539 // While the reconstructed unexpanded lines are stored in the normal
4540 // flow of lines, the expanded lines are stored on the side to be analyzed
4541 // in an extra step.
4542 CurrentExpandedLines.push_back(std::move(*Line));
4543
4544 if (Reconstruct->finished()) {
4545 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4546 assert(!Reconstructed.Tokens.empty() &&
4547 "Reconstructed must at least contain the macro identifier.");
4548 assert(!parsingPPDirective());
4549 LLVM_DEBUG({
4550 llvm::dbgs() << "Adding unexpanded line:\n";
4551 printDebugInfo(Reconstructed);
4552 });
4553 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4554 Lines.push_back(std::move(Reconstructed));
4555 CurrentExpandedLines.clear();
4556 Reconstruct.reset();
4557 }
4558 } else {
4559 // At the top level we only get here when no unexpansion is going on, or
4560 // when conditional formatting led to unfinished macro reconstructions.
4561 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4562 CurrentLines->push_back(std::move(*Line));
4563 }
4564 Line->Tokens.clear();
4565 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4566 Line->FirstStartColumn = 0;
4567 Line->IsContinuation = false;
4568 Line->SeenDecltypeAuto = false;
4569
4570 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4571 --Line->Level;
4572 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4573 CurrentLines->append(
4574 std::make_move_iterator(PreprocessorDirectives.begin()),
4575 std::make_move_iterator(PreprocessorDirectives.end()));
4576 PreprocessorDirectives.clear();
4577 }
4578 // Disconnect the current token from the last token on the previous line.
4579 FormatTok->Previous = nullptr;
4580}
4581
4582bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4583
4584bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4585 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4586 FormatTok.NewlinesBefore > 0;
4587}
4588
4589// Checks if \p FormatTok is a line comment that continues the line comment
4590// section on \p Line.
4591static bool
4593 const UnwrappedLine &Line,
4594 const llvm::Regex &CommentPragmasRegex) {
4595 if (Line.Tokens.empty())
4596 return false;
4597
4598 StringRef IndentContent = FormatTok.TokenText;
4599 if (FormatTok.TokenText.starts_with("//") ||
4600 FormatTok.TokenText.starts_with("/*")) {
4601 IndentContent = FormatTok.TokenText.substr(2);
4602 }
4603 if (CommentPragmasRegex.match(IndentContent))
4604 return false;
4605
4606 // If Line starts with a line comment, then FormatTok continues the comment
4607 // section if its original column is greater or equal to the original start
4608 // column of the line.
4609 //
4610 // Define the min column token of a line as follows: if a line ends in '{' or
4611 // contains a '{' followed by a line comment, then the min column token is
4612 // that '{'. Otherwise, the min column token of the line is the first token of
4613 // the line.
4614 //
4615 // If Line starts with a token other than a line comment, then FormatTok
4616 // continues the comment section if its original column is greater than the
4617 // original start column of the min column token of the line.
4618 //
4619 // For example, the second line comment continues the first in these cases:
4620 //
4621 // // first line
4622 // // second line
4623 //
4624 // and:
4625 //
4626 // // first line
4627 // // second line
4628 //
4629 // and:
4630 //
4631 // int i; // first line
4632 // // second line
4633 //
4634 // and:
4635 //
4636 // do { // first line
4637 // // second line
4638 // int i;
4639 // } while (true);
4640 //
4641 // and:
4642 //
4643 // enum {
4644 // a, // first line
4645 // // second line
4646 // b
4647 // };
4648 //
4649 // The second line comment doesn't continue the first in these cases:
4650 //
4651 // // first line
4652 // // second line
4653 //
4654 // and:
4655 //
4656 // int i; // first line
4657 // // second line
4658 //
4659 // and:
4660 //
4661 // do { // first line
4662 // // second line
4663 // int i;
4664 // } while (true);
4665 //
4666 // and:
4667 //
4668 // enum {
4669 // a, // first line
4670 // // second line
4671 // };
4672 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4673
4674 // Scan for '{//'. If found, use the column of '{' as a min column for line
4675 // comment section continuation.
4676 const FormatToken *PreviousToken = nullptr;
4677 for (const UnwrappedLineNode &Node : Line.Tokens) {
4678 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4679 isLineComment(*Node.Tok)) {
4680 MinColumnToken = PreviousToken;
4681 break;
4682 }
4683 PreviousToken = Node.Tok;
4684
4685 // Grab the last newline preceding a token in this unwrapped line.
4686 if (Node.Tok->NewlinesBefore > 0)
4687 MinColumnToken = Node.Tok;
4688 }
4689 if (PreviousToken && PreviousToken->is(tok::l_brace))
4690 MinColumnToken = PreviousToken;
4691
4692 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4693 MinColumnToken);
4694}
4695
4696void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4697 bool JustComments = Line->Tokens.empty();
4698 for (FormatToken *Tok : CommentsBeforeNextToken) {
4699 // Line comments that belong to the same line comment section are put on the
4700 // same line since later we might want to reflow content between them.
4701 // Additional fine-grained breaking of line comment sections is controlled
4702 // by the class BreakableLineCommentSection in case it is desirable to keep
4703 // several line comment sections in the same unwrapped line.
4704 //
4705 // FIXME: Consider putting separate line comment sections as children to the
4706 // unwrapped line instead.
4707 Tok->ContinuesLineCommentSection =
4708 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4709 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4710 addUnwrappedLine();
4711 pushToken(Tok);
4712 }
4713 if (NewlineBeforeNext && JustComments)
4714 addUnwrappedLine();
4715 CommentsBeforeNextToken.clear();
4716}
4717
4718void UnwrappedLineParser::nextToken(int LevelDifference) {
4719 if (eof())
4720 return;
4721 flushComments(isOnNewLine(*FormatTok));
4722 pushToken(FormatTok);
4723 FormatToken *Previous = FormatTok;
4724 if (!Style.isJavaScript())
4725 readToken(LevelDifference);
4726 else
4727 readTokenWithJavaScriptASI();
4728 FormatTok->Previous = Previous;
4729 if (Style.isVerilog()) {
4730 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4731 // keywords like `begin`, we can't treat them the same as left braces
4732 // because some contexts require one of them. For example structs use
4733 // braces and if blocks use keywords, and a left brace can occur in an if
4734 // statement, but it is not a block. For keywords like `end`, we simply
4735 // treat them the same as right braces.
4736 if (Keywords.isVerilogEnd(*FormatTok))
4737 FormatTok->Tok.setKind(tok::r_brace);
4738 }
4739}
4740
4741void UnwrappedLineParser::distributeComments(
4742 const SmallVectorImpl<FormatToken *> &Comments,
4743 const FormatToken *NextTok) {
4744 // Whether or not a line comment token continues a line is controlled by
4745 // the method continuesLineCommentSection, with the following caveat:
4746 //
4747 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4748 // that each comment line from the trail is aligned with the next token, if
4749 // the next token exists. If a trail exists, the beginning of the maximal
4750 // trail is marked as a start of a new comment section.
4751 //
4752 // For example in this code:
4753 //
4754 // int a; // line about a
4755 // // line 1 about b
4756 // // line 2 about b
4757 // int b;
4758 //
4759 // the two lines about b form a maximal trail, so there are two sections, the
4760 // first one consisting of the single comment "// line about a" and the
4761 // second one consisting of the next two comments.
4762 if (Comments.empty())
4763 return;
4764 bool ShouldPushCommentsInCurrentLine = true;
4765 bool HasTrailAlignedWithNextToken = false;
4766 unsigned StartOfTrailAlignedWithNextToken = 0;
4767 if (NextTok) {
4768 // We are skipping the first element intentionally.
4769 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4770 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4771 HasTrailAlignedWithNextToken = true;
4772 StartOfTrailAlignedWithNextToken = i;
4773 }
4774 }
4775 }
4776 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4777 FormatToken *FormatTok = Comments[i];
4778 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4779 FormatTok->ContinuesLineCommentSection = false;
4780 } else {
4781 FormatTok->ContinuesLineCommentSection =
4782 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4783 }
4784 if (!FormatTok->ContinuesLineCommentSection &&
4785 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4786 ShouldPushCommentsInCurrentLine = false;
4787 }
4788 if (ShouldPushCommentsInCurrentLine)
4789 pushToken(FormatTok);
4790 else
4791 CommentsBeforeNextToken.push_back(FormatTok);
4792 }
4793}
4794
4795void UnwrappedLineParser::readToken(int LevelDifference) {
4796 SmallVector<FormatToken *, 1> Comments;
4797 bool PreviousWasComment = false;
4798 bool FirstNonCommentOnLine = false;
4799 do {
4800 FormatTok = Tokens->getNextToken();
4801 assert(FormatTok);
4802 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4803 TT_ConflictAlternative)) {
4804 if (FormatTok->is(TT_ConflictStart))
4805 conditionalCompilationStart(/*Unreachable=*/false);
4806 else if (FormatTok->is(TT_ConflictAlternative))
4807 conditionalCompilationAlternative();
4808 else if (FormatTok->is(TT_ConflictEnd))
4809 conditionalCompilationEnd();
4810 FormatTok = Tokens->getNextToken();
4811 FormatTok->MustBreakBefore = true;
4812 FormatTok->MustBreakBeforeFinalized = true;
4813 }
4814
4815 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4816 const FormatToken &Tok,
4817 bool PreviousWasComment) {
4818 auto IsFirstOnLine = [](const FormatToken &Tok) {
4819 return Tok.HasUnescapedNewline || Tok.IsFirst;
4820 };
4821
4822 // Consider preprocessor directives preceded by block comments as first
4823 // on line.
4824 if (PreviousWasComment)
4825 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4826 return IsFirstOnLine(Tok);
4827 };
4828
4829 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4830 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4831 PreviousWasComment = FormatTok->is(tok::comment);
4832
4833 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4834 (!Style.isVerilog() ||
4835 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4836 FirstNonCommentOnLine) {
4837 distributeComments(Comments, FormatTok);
4838 Comments.clear();
4839 // If there is an unfinished unwrapped line, we flush the preprocessor
4840 // directives only after that unwrapped line was finished later.
4841 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4842 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4843 assert((LevelDifference >= 0 ||
4844 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4845 "LevelDifference makes Line->Level negative");
4846 Line->Level += LevelDifference;
4847 // Comments stored before the preprocessor directive need to be output
4848 // before the preprocessor directive, at the same level as the
4849 // preprocessor directive, as we consider them to apply to the directive.
4851 PPBranchLevel > 0) {
4852 Line->Level += PPBranchLevel;
4853 }
4854 assert(Line->Level >= Line->UnbracedBodyLevel);
4855 Line->Level -= Line->UnbracedBodyLevel;
4856 flushComments(isOnNewLine(*FormatTok));
4857 parsePPDirective();
4858 PreviousWasComment = FormatTok->is(tok::comment);
4859 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4860 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4861 }
4862
4863 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4864 !Line->InPPDirective) {
4865 continue;
4866 }
4867
4868 if (FormatTok->is(tok::identifier) &&
4869 Macros.defined(FormatTok->TokenText) &&
4870 // FIXME: Allow expanding macros in preprocessor directives.
4871 !Line->InPPDirective) {
4872 FormatToken *ID = FormatTok;
4873 unsigned Position = Tokens->getPosition();
4874
4875 // To correctly parse the code, we need to replace the tokens of the macro
4876 // call with its expansion.
4877 auto PreCall = std::move(Line);
4878 Line.reset(new UnwrappedLine);
4879 bool OldInExpansion = InExpansion;
4880 InExpansion = true;
4881 // We parse the macro call into a new line.
4882 auto Args = parseMacroCall();
4883 InExpansion = OldInExpansion;
4884 assert(Line->Tokens.front().Tok == ID);
4885 // And remember the unexpanded macro call tokens.
4886 auto UnexpandedLine = std::move(Line);
4887 // Reset to the old line.
4888 Line = std::move(PreCall);
4889
4890 LLVM_DEBUG({
4891 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4892 if (Args) {
4893 llvm::dbgs() << "(";
4894 for (const auto &Arg : Args.value())
4895 for (const auto &T : Arg)
4896 llvm::dbgs() << T->TokenText << " ";
4897 llvm::dbgs() << ")";
4898 }
4899 llvm::dbgs() << "\n";
4900 });
4901 if (Macros.objectLike(ID->TokenText) && Args &&
4902 !Macros.hasArity(ID->TokenText, Args->size())) {
4903 // The macro is either
4904 // - object-like, but we got argumnets, or
4905 // - overloaded to be both object-like and function-like, but none of
4906 // the function-like arities match the number of arguments.
4907 // Thus, expand as object-like macro.
4908 LLVM_DEBUG(llvm::dbgs()
4909 << "Macro \"" << ID->TokenText
4910 << "\" not overloaded for arity " << Args->size()
4911 << "or not function-like, using object-like overload.");
4912 Args.reset();
4913 UnexpandedLine->Tokens.resize(1);
4914 Tokens->setPosition(Position);
4915 nextToken();
4916 assert(!Args && Macros.objectLike(ID->TokenText));
4917 }
4918 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4919 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4920 // Next, we insert the expanded tokens in the token stream at the
4921 // current position, and continue parsing.
4922 Unexpanded[ID] = std::move(UnexpandedLine);
4923 SmallVector<FormatToken *, 8> Expansion =
4924 Macros.expand(ID, std::move(Args));
4925 if (!Expansion.empty())
4926 FormatTok = Tokens->insertTokens(Expansion);
4927
4928 LLVM_DEBUG({
4929 llvm::dbgs() << "Expanded: ";
4930 for (const auto &T : Expansion)
4931 llvm::dbgs() << T->TokenText << " ";
4932 llvm::dbgs() << "\n";
4933 });
4934 } else {
4935 LLVM_DEBUG({
4936 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4937 << "\", because it was used ";
4938 if (Args)
4939 llvm::dbgs() << "with " << Args->size();
4940 else
4941 llvm::dbgs() << "without";
4942 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4943 });
4944 Tokens->setPosition(Position);
4945 FormatTok = ID;
4946 }
4947 }
4948
4949 if (FormatTok->isNot(tok::comment)) {
4950 distributeComments(Comments, FormatTok);
4951 Comments.clear();
4952 return;
4953 }
4954
4955 Comments.push_back(FormatTok);
4956 } while (!eof());
4957
4958 distributeComments(Comments, nullptr);
4959 Comments.clear();
4960}
4961
4962namespace {
4963template <typename Iterator>
4964void pushTokens(Iterator Begin, Iterator End,
4966 for (auto I = Begin; I != End; ++I) {
4967 Into.push_back(I->Tok);
4968 for (const auto &Child : I->Children)
4969 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4970 }
4971}
4972} // namespace
4973
4974std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4975UnwrappedLineParser::parseMacroCall() {
4976 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4977 assert(Line->Tokens.empty());
4978 nextToken();
4979 if (FormatTok->isNot(tok::l_paren))
4980 return Args;
4981 unsigned Position = Tokens->getPosition();
4982 FormatToken *Tok = FormatTok;
4983 nextToken();
4984 Args.emplace();
4985 auto ArgStart = std::prev(Line->Tokens.end());
4986
4987 int Parens = 0;
4988 do {
4989 switch (FormatTok->Tok.getKind()) {
4990 case tok::l_paren:
4991 ++Parens;
4992 nextToken();
4993 break;
4994 case tok::r_paren: {
4995 if (Parens > 0) {
4996 --Parens;
4997 nextToken();
4998 break;
4999 }
5000 Args->push_back({});
5001 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5002 nextToken();
5003 return Args;
5004 }
5005 case tok::comma: {
5006 if (Parens > 0) {
5007 nextToken();
5008 break;
5009 }
5010 Args->push_back({});
5011 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5012 nextToken();
5013 ArgStart = std::prev(Line->Tokens.end());
5014 break;
5015 }
5016 default:
5017 nextToken();
5018 break;
5019 }
5020 } while (!eof());
5021 Line->Tokens.resize(1);
5022 Tokens->setPosition(Position);
5023 FormatTok = Tok;
5024 return {};
5025}
5026
5027void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5028 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5029 if (MustBreakBeforeNextToken) {
5030 Line->Tokens.back().Tok->MustBreakBefore = true;
5031 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5032 MustBreakBeforeNextToken = false;
5033 }
5034}
5035
5036} // end namespace format
5037} // end namespace clang
DynTypedNode Node
static char ID
Definition: Arena.cpp:183
Expr * E
enum clang::sema::@1651::IndirectLocalPathEntry::EntryKind Kind
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:2990
This file contains the main building blocks of macro support in clang-format.
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
do v
Definition: arm_acle.h:91
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:58
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
void setKind(tok::TokenKind K)
Definition: Token.h:95
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:70
tok::TokenKind getKind() const
Definition: Token.h:94
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
static bool isCOperatorFollowingVar(tok::TokenKind Kind)
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1963
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex)
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1956
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3861
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:207
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
@ Parens
New-expression has a C++98 paren-delimited initializer.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1024
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1857
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1850
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1895
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1869
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1823
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1457
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1787
bool AfterClass
Wrap class definitions.
Definition: Format.h:1339
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1406
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1420
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1354
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1497
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1392
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1386
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1342
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1370
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1434
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
bool isTableGen() const
Definition: Format.h:3217
@ LK_Java
Should be used for Java.
Definition: Format.h:3189
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:3200
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:3198
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3203
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2859
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2731
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2823
bool RemoveSemicolon
Remove semicolons after the closing braces of functions and constructors/destructors.
Definition: Format.h:3906
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2769
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2783
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2712
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:2905
RemoveParenthesesStyle RemoveParentheses
Remove redundant parentheses.
Definition: Format.h:3888
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3221
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:3852
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2818
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2800
bool AllowShortLoopsOnASingleLine
If true, while (true) continue; can be put on a single line.
Definition: Format.h:973
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:810
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:3362
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:2164
bool isCSharp() const
Definition: Format.h:3210
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1303
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:1282
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:2047
bool isVerilog() const
Definition: Format.h:3213
bool isJavaScript() const
Definition: Format.h:3212
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2748
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1551
@ RPS_Leave
Do not remove parentheses.
Definition: Format.h:3862
@ RPS_ReturnStatement
Also remove parentheses enclosing the expression in a return/co_return statement.
Definition: Format.h:3877
bool SkipMacroDefinitionBody
Do not format macro definition body.
Definition: Format.h:4097
@ NI_All
Indent in all namespaces.
Definition: Format.h:3357
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:3347
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2689
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2788
unsigned ColumnLimit
The column limit.
Definition: Format.h:2337
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:292
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:576
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:662
bool isTypeName(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:44
bool isCppAlternativeOperatorKeyword() const
Definition: FormatToken.h:730
bool isNot(T Kind) const
Definition: FormatToken.h:623
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:312
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:832
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:371
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:461
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:387
bool isStringLiteral() const
Definition: FormatToken.h:656
bool isBinaryOperator() const
Definition: FormatToken.h:769
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:604
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:820
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:616
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:374
bool isAccessSpecifierKeyword() const
Definition: FormatToken.h:666
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:558
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:561
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:652
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:440
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex