clang 20.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
55 CI != CE; ++CI) {
56 OS << "\n";
57 printLine(OS, *CI, (Prefix + " ").str());
58 NewLine = true;
59 }
60 }
61 if (!NewLine)
62 OS << "\n";
63}
64
65LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
66 printLine(llvm::dbgs(), Line);
67}
68
69class ScopedDeclarationState {
70public:
71 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
72 bool MustBeDeclaration)
73 : Line(Line), Stack(Stack) {
74 Line.MustBeDeclaration = MustBeDeclaration;
75 Stack.push_back(MustBeDeclaration);
76 }
77 ~ScopedDeclarationState() {
78 Stack.pop_back();
79 if (!Stack.empty())
80 Line.MustBeDeclaration = Stack.back();
81 else
82 Line.MustBeDeclaration = true;
83 }
84
85private:
86 UnwrappedLine &Line;
87 llvm::BitVector &Stack;
88};
89
90} // end anonymous namespace
91
92std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
93 llvm::raw_os_ostream OS(Stream);
94 printLine(OS, Line);
95 return Stream;
96}
97
99public:
101 bool SwitchToPreprocessorLines = false)
102 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
103 if (SwitchToPreprocessorLines)
104 Parser.CurrentLines = &Parser.PreprocessorDirectives;
105 else if (!Parser.Line->Tokens.empty())
106 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
107 PreBlockLine = std::move(Parser.Line);
108 Parser.Line = std::make_unique<UnwrappedLine>();
109 Parser.Line->Level = PreBlockLine->Level;
110 Parser.Line->PPLevel = PreBlockLine->PPLevel;
111 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
112 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
113 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
114 }
115
117 if (!Parser.Line->Tokens.empty())
118 Parser.addUnwrappedLine();
119 assert(Parser.Line->Tokens.empty());
120 Parser.Line = std::move(PreBlockLine);
121 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
122 Parser.MustBreakBeforeNextToken = true;
123 Parser.CurrentLines = OriginalLines;
124 }
125
126private:
128
129 std::unique_ptr<UnwrappedLine> PreBlockLine;
130 SmallVectorImpl<UnwrappedLine> *OriginalLines;
131};
132
134public:
136 const FormatStyle &Style, unsigned &LineLevel)
138 Style.BraceWrapping.AfterControlStatement,
139 Style.BraceWrapping.IndentBraces) {}
141 bool WrapBrace, bool IndentBrace)
142 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
143 if (WrapBrace)
144 Parser->addUnwrappedLine();
145 if (IndentBrace)
146 ++LineLevel;
147 }
148 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
149
150private:
151 unsigned &LineLevel;
152 unsigned OldLineLevel;
153};
154
156 SourceManager &SourceMgr, const FormatStyle &Style,
157 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
159 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
160 IdentifierTable &IdentTable)
161 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
162 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
163 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
164 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
165 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
166 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
167 ? IG_Rejected
168 : IG_Inited),
169 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
170 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
171 assert(IsCpp == LangOpts.CXXOperatorNames);
172}
173
174void UnwrappedLineParser::reset() {
175 PPBranchLevel = -1;
176 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
177 ? IG_Rejected
178 : IG_Inited;
179 IncludeGuardToken = nullptr;
180 Line.reset(new UnwrappedLine);
181 CommentsBeforeNextToken.clear();
182 FormatTok = nullptr;
183 MustBreakBeforeNextToken = false;
184 IsDecltypeAutoFunction = false;
185 PreprocessorDirectives.clear();
186 CurrentLines = &Lines;
187 DeclarationScopeStack.clear();
188 NestedTooDeep.clear();
189 NestedLambdas.clear();
190 PPStack.clear();
191 Line->FirstStartColumn = FirstStartColumn;
192
193 if (!Unexpanded.empty())
194 for (FormatToken *Token : AllTokens)
195 Token->MacroCtx.reset();
196 CurrentExpandedLines.clear();
197 ExpandedLines.clear();
198 Unexpanded.clear();
199 InExpansion = false;
200 Reconstruct.reset();
201}
202
204 IndexedTokenSource TokenSource(AllTokens);
205 Line->FirstStartColumn = FirstStartColumn;
206 do {
207 LLVM_DEBUG(llvm::dbgs() << "----\n");
208 reset();
209 Tokens = &TokenSource;
210 TokenSource.reset();
211
212 readToken();
213 parseFile();
214
215 // If we found an include guard then all preprocessor directives (other than
216 // the guard) are over-indented by one.
217 if (IncludeGuard == IG_Found) {
218 for (auto &Line : Lines)
219 if (Line.InPPDirective && Line.Level > 0)
220 --Line.Level;
221 }
222
223 // Create line with eof token.
224 assert(eof());
225 pushToken(FormatTok);
226 addUnwrappedLine();
227
228 // In a first run, format everything with the lines containing macro calls
229 // replaced by the expansion.
230 if (!ExpandedLines.empty()) {
231 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
232 for (const auto &Line : Lines) {
233 if (!Line.Tokens.empty()) {
234 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
235 if (it != ExpandedLines.end()) {
236 for (const auto &Expanded : it->second) {
237 LLVM_DEBUG(printDebugInfo(Expanded));
238 Callback.consumeUnwrappedLine(Expanded);
239 }
240 continue;
241 }
242 }
243 LLVM_DEBUG(printDebugInfo(Line));
244 Callback.consumeUnwrappedLine(Line);
245 }
246 Callback.finishRun();
247 }
248
249 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
250 for (const UnwrappedLine &Line : Lines) {
251 LLVM_DEBUG(printDebugInfo(Line));
252 Callback.consumeUnwrappedLine(Line);
253 }
254 Callback.finishRun();
255 Lines.clear();
256 while (!PPLevelBranchIndex.empty() &&
257 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
258 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
259 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
260 }
261 if (!PPLevelBranchIndex.empty()) {
262 ++PPLevelBranchIndex.back();
263 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
264 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
265 }
266 } while (!PPLevelBranchIndex.empty());
267}
268
269void UnwrappedLineParser::parseFile() {
270 // The top-level context in a file always has declarations, except for pre-
271 // processor directives and JavaScript files.
272 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
273 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
274 MustBeDeclaration);
276 parseBracedList();
277 else
278 parseLevel();
279 // Make sure to format the remaining tokens.
280 //
281 // LK_TextProto is special since its top-level is parsed as the body of a
282 // braced list, which does not necessarily have natural line separators such
283 // as a semicolon. Comments after the last entry that have been determined to
284 // not belong to that line, as in:
285 // key: value
286 // // endfile comment
287 // do not have a chance to be put on a line of their own until this point.
288 // Here we add this newline before end-of-file comments.
289 if (Style.Language == FormatStyle::LK_TextProto &&
290 !CommentsBeforeNextToken.empty()) {
291 addUnwrappedLine();
292 }
293 flushComments(true);
294 addUnwrappedLine();
295}
296
297void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
298 do {
299 switch (FormatTok->Tok.getKind()) {
300 case tok::l_brace:
301 return;
302 default:
303 if (FormatTok->is(Keywords.kw_where)) {
304 addUnwrappedLine();
305 nextToken();
306 parseCSharpGenericTypeConstraint();
307 break;
308 }
309 nextToken();
310 break;
311 }
312 } while (!eof());
313}
314
315void UnwrappedLineParser::parseCSharpAttribute() {
316 int UnpairedSquareBrackets = 1;
317 do {
318 switch (FormatTok->Tok.getKind()) {
319 case tok::r_square:
320 nextToken();
321 --UnpairedSquareBrackets;
322 if (UnpairedSquareBrackets == 0) {
323 addUnwrappedLine();
324 return;
325 }
326 break;
327 case tok::l_square:
328 ++UnpairedSquareBrackets;
329 nextToken();
330 break;
331 default:
332 nextToken();
333 break;
334 }
335 } while (!eof());
336}
337
338bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
339 if (!Lines.empty() && Lines.back().InPPDirective)
340 return true;
341
342 const FormatToken *Previous = Tokens->getPreviousToken();
343 return Previous && Previous->is(tok::comment) &&
344 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
345}
346
347/// \brief Parses a level, that is ???.
348/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
349/// \param IfKind The \p if statement kind in the level.
350/// \param IfLeftBrace The left brace of the \p if block in the level.
351/// \returns true if a simple block of if/else/for/while, or false otherwise.
352/// (A simple block has a single statement.)
353bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
354 IfStmtKind *IfKind,
355 FormatToken **IfLeftBrace) {
356 const bool InRequiresExpression =
357 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
358 const bool IsPrecededByCommentOrPPDirective =
359 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
360 FormatToken *IfLBrace = nullptr;
361 bool HasDoWhile = false;
362 bool HasLabel = false;
363 unsigned StatementCount = 0;
364 bool SwitchLabelEncountered = false;
365
366 do {
367 if (FormatTok->isAttribute()) {
368 nextToken();
369 if (FormatTok->is(tok::l_paren))
370 parseParens();
371 continue;
372 }
373 tok::TokenKind Kind = FormatTok->Tok.getKind();
374 if (FormatTok->is(TT_MacroBlockBegin))
375 Kind = tok::l_brace;
376 else if (FormatTok->is(TT_MacroBlockEnd))
377 Kind = tok::r_brace;
378
379 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
380 &HasLabel, &StatementCount] {
381 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
382 HasDoWhile ? nullptr : &HasDoWhile,
383 HasLabel ? nullptr : &HasLabel);
384 ++StatementCount;
385 assert(StatementCount > 0 && "StatementCount overflow!");
386 };
387
388 switch (Kind) {
389 case tok::comment:
390 nextToken();
391 addUnwrappedLine();
392 break;
393 case tok::l_brace:
394 if (InRequiresExpression) {
395 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
396 } else if (FormatTok->Previous &&
397 FormatTok->Previous->ClosesRequiresClause) {
398 // We need the 'default' case here to correctly parse a function
399 // l_brace.
400 ParseDefault();
401 continue;
402 }
403 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
404 if (tryToParseBracedList())
405 continue;
406 FormatTok->setFinalizedType(TT_BlockLBrace);
407 }
408 parseBlock();
409 ++StatementCount;
410 assert(StatementCount > 0 && "StatementCount overflow!");
411 addUnwrappedLine();
412 break;
413 case tok::r_brace:
414 if (OpeningBrace) {
415 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
416 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
417 return false;
418 }
419 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
420 HasDoWhile || IsPrecededByCommentOrPPDirective ||
421 precededByCommentOrPPDirective()) {
422 return false;
423 }
424 const FormatToken *Next = Tokens->peekNextToken();
425 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
426 return false;
427 if (IfLeftBrace)
428 *IfLeftBrace = IfLBrace;
429 return true;
430 }
431 nextToken();
432 addUnwrappedLine();
433 break;
434 case tok::kw_default: {
435 unsigned StoredPosition = Tokens->getPosition();
436 auto *Next = Tokens->getNextNonComment();
437 FormatTok = Tokens->setPosition(StoredPosition);
438 if (!Next->isOneOf(tok::colon, tok::arrow)) {
439 // default not followed by `:` or `->` is not a case label; treat it
440 // like an identifier.
441 parseStructuralElement();
442 break;
443 }
444 // Else, if it is 'default:', fall through to the case handling.
445 [[fallthrough]];
446 }
447 case tok::kw_case:
448 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
449 (Style.isJavaScript() && Line->MustBeDeclaration)) {
450 // Proto: there are no switch/case statements
451 // Verilog: Case labels don't have this word. We handle case
452 // labels including default in TokenAnnotator.
453 // JavaScript: A 'case: string' style field declaration.
454 ParseDefault();
455 break;
456 }
457 if (!SwitchLabelEncountered &&
458 (Style.IndentCaseLabels ||
459 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
460 (Line->InPPDirective && Line->Level == 1))) {
461 ++Line->Level;
462 }
463 SwitchLabelEncountered = true;
464 parseStructuralElement();
465 break;
466 case tok::l_square:
467 if (Style.isCSharp()) {
468 nextToken();
469 parseCSharpAttribute();
470 break;
471 }
472 if (handleCppAttributes())
473 break;
474 [[fallthrough]];
475 default:
476 ParseDefault();
477 break;
478 }
479 } while (!eof());
480
481 return false;
482}
483
484void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
485 // We'll parse forward through the tokens until we hit
486 // a closing brace or eof - note that getNextToken() will
487 // parse macros, so this will magically work inside macro
488 // definitions, too.
489 unsigned StoredPosition = Tokens->getPosition();
490 FormatToken *Tok = FormatTok;
491 const FormatToken *PrevTok = Tok->Previous;
492 // Keep a stack of positions of lbrace tokens. We will
493 // update information about whether an lbrace starts a
494 // braced init list or a different block during the loop.
495 struct StackEntry {
496 FormatToken *Tok;
497 const FormatToken *PrevTok;
498 };
499 SmallVector<StackEntry, 8> LBraceStack;
500 assert(Tok->is(tok::l_brace));
501
502 do {
503 auto *NextTok = Tokens->getNextNonComment();
504
505 if (!Line->InMacroBody && !Style.isTableGen()) {
506 // Skip PPDirective lines and comments.
507 while (NextTok->is(tok::hash)) {
508 NextTok = Tokens->getNextToken();
509 if (NextTok->is(tok::pp_not_keyword))
510 break;
511 do {
512 NextTok = Tokens->getNextToken();
513 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof));
514
515 while (NextTok->is(tok::comment))
516 NextTok = Tokens->getNextToken();
517 }
518 }
519
520 switch (Tok->Tok.getKind()) {
521 case tok::l_brace:
522 if (Style.isJavaScript() && PrevTok) {
523 if (PrevTok->isOneOf(tok::colon, tok::less)) {
524 // A ':' indicates this code is in a type, or a braced list
525 // following a label in an object literal ({a: {b: 1}}).
526 // A '<' could be an object used in a comparison, but that is nonsense
527 // code (can never return true), so more likely it is a generic type
528 // argument (`X<{a: string; b: number}>`).
529 // The code below could be confused by semicolons between the
530 // individual members in a type member list, which would normally
531 // trigger BK_Block. In both cases, this must be parsed as an inline
532 // braced init.
534 } else if (PrevTok->is(tok::r_paren)) {
535 // `) { }` can only occur in function or method declarations in JS.
536 Tok->setBlockKind(BK_Block);
537 }
538 } else {
539 Tok->setBlockKind(BK_Unknown);
540 }
541 LBraceStack.push_back({Tok, PrevTok});
542 break;
543 case tok::r_brace:
544 if (LBraceStack.empty())
545 break;
546 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
547 bool ProbablyBracedList = false;
548 if (Style.Language == FormatStyle::LK_Proto) {
549 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
550 } else if (LBrace->isNot(TT_EnumLBrace)) {
551 // Using OriginalColumn to distinguish between ObjC methods and
552 // binary operators is a bit hacky.
553 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
554 NextTok->OriginalColumn == 0;
555
556 // Try to detect a braced list. Note that regardless how we mark inner
557 // braces here, we will overwrite the BlockKind later if we parse a
558 // braced list (where all blocks inside are by default braced lists),
559 // or when we explicitly detect blocks (for example while parsing
560 // lambdas).
561
562 // If we already marked the opening brace as braced list, the closing
563 // must also be part of it.
564 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
565
566 ProbablyBracedList = ProbablyBracedList ||
567 (Style.isJavaScript() &&
568 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
569 Keywords.kw_as));
570 ProbablyBracedList =
571 ProbablyBracedList ||
572 (IsCpp && (PrevTok->Tok.isLiteral() ||
573 NextTok->isOneOf(tok::l_paren, tok::arrow)));
574
575 // If there is a comma, semicolon or right paren after the closing
576 // brace, we assume this is a braced initializer list.
577 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
578 // braced list in JS.
579 ProbablyBracedList =
580 ProbablyBracedList ||
581 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
582 tok::r_paren, tok::r_square, tok::ellipsis);
583
584 // Distinguish between braced list in a constructor initializer list
585 // followed by constructor body, or just adjacent blocks.
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
589 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
590 tok::greater));
591
592 ProbablyBracedList =
593 ProbablyBracedList ||
594 (NextTok->is(tok::identifier) &&
595 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
596
597 ProbablyBracedList = ProbablyBracedList ||
598 (NextTok->is(tok::semi) &&
599 (!ExpectClassBody || LBraceStack.size() != 1));
600
601 ProbablyBracedList =
602 ProbablyBracedList ||
603 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
604
605 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
606 // We can have an array subscript after a braced init
607 // list, but C++11 attributes are expected after blocks.
608 NextTok = Tokens->getNextToken();
609 ProbablyBracedList = NextTok->isNot(tok::l_square);
610 }
611
612 // Cpp macro definition body that is a nonempty braced list or block:
613 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
614 !FormatTok->Previous && NextTok->is(tok::eof) &&
615 // A statement can end with only `;` (simple statement), a block
616 // closing brace (compound statement), or `:` (label statement).
617 // If PrevTok is a block opening brace, Tok ends an empty block.
618 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
619 ProbablyBracedList = true;
620 }
621 }
622 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
623 Tok->setBlockKind(BlockKind);
624 LBrace->setBlockKind(BlockKind);
625 }
626 LBraceStack.pop_back();
627 break;
628 case tok::identifier:
629 if (Tok->isNot(TT_StatementMacro))
630 break;
631 [[fallthrough]];
632 case tok::at:
633 case tok::semi:
634 case tok::kw_if:
635 case tok::kw_while:
636 case tok::kw_for:
637 case tok::kw_switch:
638 case tok::kw_try:
639 case tok::kw___try:
640 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
641 LBraceStack.back().Tok->setBlockKind(BK_Block);
642 break;
643 default:
644 break;
645 }
646
647 PrevTok = Tok;
648 Tok = NextTok;
649 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
650
651 // Assume other blocks for all unclosed opening braces.
652 for (const auto &Entry : LBraceStack)
653 if (Entry.Tok->is(BK_Unknown))
654 Entry.Tok->setBlockKind(BK_Block);
655
656 FormatTok = Tokens->setPosition(StoredPosition);
657}
658
659// Sets the token type of the directly previous right brace.
660void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
661 if (auto Prev = FormatTok->getPreviousNonComment();
662 Prev && Prev->is(tok::r_brace)) {
663 Prev->setFinalizedType(Type);
664 }
665}
666
667template <class T>
668static inline void hash_combine(std::size_t &seed, const T &v) {
669 std::hash<T> hasher;
670 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
671}
672
673size_t UnwrappedLineParser::computePPHash() const {
674 size_t h = 0;
675 for (const auto &i : PPStack) {
676 hash_combine(h, size_t(i.Kind));
677 hash_combine(h, i.Line);
678 }
679 return h;
680}
681
682// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
683// is not null, subtracts its length (plus the preceding space) when computing
684// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
685// running the token annotator on it so that we can restore them afterward.
686bool UnwrappedLineParser::mightFitOnOneLine(
687 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
688 const auto ColumnLimit = Style.ColumnLimit;
689 if (ColumnLimit == 0)
690 return true;
691
692 auto &Tokens = ParsedLine.Tokens;
693 assert(!Tokens.empty());
694
695 const auto *LastToken = Tokens.back().Tok;
696 assert(LastToken);
697
698 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
699
700 int Index = 0;
701 for (const auto &Token : Tokens) {
702 assert(Token.Tok);
703 auto &SavedToken = SavedTokens[Index++];
704 SavedToken.Tok = new FormatToken;
705 SavedToken.Tok->copyFrom(*Token.Tok);
706 SavedToken.Children = std::move(Token.Children);
707 }
708
709 AnnotatedLine Line(ParsedLine);
710 assert(Line.Last == LastToken);
711
712 TokenAnnotator Annotator(Style, Keywords);
713 Annotator.annotate(Line);
714 Annotator.calculateFormattingInformation(Line);
715
716 auto Length = LastToken->TotalLength;
717 if (OpeningBrace) {
718 assert(OpeningBrace != Tokens.front().Tok);
719 if (auto Prev = OpeningBrace->Previous;
720 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
721 Length -= ColumnLimit;
722 }
723 Length -= OpeningBrace->TokenText.size() + 1;
724 }
725
726 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
727 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
728 Length -= FirstToken->TokenText.size() + 1;
729 }
730
731 Index = 0;
732 for (auto &Token : Tokens) {
733 const auto &SavedToken = SavedTokens[Index++];
734 Token.Tok->copyFrom(*SavedToken.Tok);
735 Token.Children = std::move(SavedToken.Children);
736 delete SavedToken.Tok;
737 }
738
739 // If these change PPLevel needs to be used for get correct indentation.
740 assert(!Line.InMacroBody);
741 assert(!Line.InPPDirective);
742 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
743}
744
745FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
746 unsigned AddLevels, bool MunchSemi,
747 bool KeepBraces,
748 IfStmtKind *IfKind,
749 bool UnindentWhitesmithsBraces) {
750 auto HandleVerilogBlockLabel = [this]() {
751 // ":" name
752 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
753 nextToken();
754 if (Keywords.isVerilogIdentifier(*FormatTok))
755 nextToken();
756 }
757 };
758
759 // Whether this is a Verilog-specific block that has a special header like a
760 // module.
761 const bool VerilogHierarchy =
762 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
763 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
764 (Style.isVerilog() &&
765 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
766 "'{' or macro block token expected");
767 FormatToken *Tok = FormatTok;
768 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
769 auto Index = CurrentLines->size();
770 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
771 FormatTok->setBlockKind(BK_Block);
772
773 // For Whitesmiths mode, jump to the next level prior to skipping over the
774 // braces.
775 if (!VerilogHierarchy && AddLevels > 0 &&
777 ++Line->Level;
778 }
779
780 size_t PPStartHash = computePPHash();
781
782 const unsigned InitialLevel = Line->Level;
783 if (VerilogHierarchy) {
784 AddLevels += parseVerilogHierarchyHeader();
785 } else {
786 nextToken(/*LevelDifference=*/AddLevels);
787 HandleVerilogBlockLabel();
788 }
789
790 // Bail out if there are too many levels. Otherwise, the stack might overflow.
791 if (Line->Level > 300)
792 return nullptr;
793
794 if (MacroBlock && FormatTok->is(tok::l_paren))
795 parseParens();
796
797 size_t NbPreprocessorDirectives =
798 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
799 addUnwrappedLine();
800 size_t OpeningLineIndex =
801 CurrentLines->empty()
803 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
804
805 // Whitesmiths is weird here. The brace needs to be indented for the namespace
806 // block, but the block itself may not be indented depending on the style
807 // settings. This allows the format to back up one level in those cases.
808 if (UnindentWhitesmithsBraces)
809 --Line->Level;
810
811 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
812 MustBeDeclaration);
813 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
814 Line->Level += AddLevels;
815
816 FormatToken *IfLBrace = nullptr;
817 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
818
819 if (eof())
820 return IfLBrace;
821
822 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
823 : FormatTok->isNot(tok::r_brace)) {
824 Line->Level = InitialLevel;
825 FormatTok->setBlockKind(BK_Block);
826 return IfLBrace;
827 }
828
829 if (FormatTok->is(tok::r_brace)) {
830 FormatTok->setBlockKind(BK_Block);
831 if (Tok->is(TT_NamespaceLBrace))
832 FormatTok->setFinalizedType(TT_NamespaceRBrace);
833 }
834
835 const bool IsFunctionRBrace =
836 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
837
838 auto RemoveBraces = [=]() mutable {
839 if (!SimpleBlock)
840 return false;
841 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
842 assert(FormatTok->is(tok::r_brace));
843 const bool WrappedOpeningBrace = !Tok->Previous;
844 if (WrappedOpeningBrace && FollowedByComment)
845 return false;
846 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
847 if (KeepBraces && !HasRequiredIfBraces)
848 return false;
849 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
850 const FormatToken *Previous = Tokens->getPreviousToken();
851 assert(Previous);
852 if (Previous->is(tok::r_brace) && !Previous->Optional)
853 return false;
854 }
855 assert(!CurrentLines->empty());
856 auto &LastLine = CurrentLines->back();
857 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
858 return false;
859 if (Tok->is(TT_ElseLBrace))
860 return true;
861 if (WrappedOpeningBrace) {
862 assert(Index > 0);
863 --Index; // The line above the wrapped l_brace.
864 Tok = nullptr;
865 }
866 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
867 };
868 if (RemoveBraces()) {
869 Tok->MatchingParen = FormatTok;
870 FormatTok->MatchingParen = Tok;
871 }
872
873 size_t PPEndHash = computePPHash();
874
875 // Munch the closing brace.
876 nextToken(/*LevelDifference=*/-AddLevels);
877
878 // When this is a function block and there is an unnecessary semicolon
879 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
880 // it later).
881 if (Style.RemoveSemicolon && IsFunctionRBrace) {
882 while (FormatTok->is(tok::semi)) {
883 FormatTok->Optional = true;
884 nextToken();
885 }
886 }
887
888 HandleVerilogBlockLabel();
889
890 if (MacroBlock && FormatTok->is(tok::l_paren))
891 parseParens();
892
893 Line->Level = InitialLevel;
894
895 if (FormatTok->is(tok::kw_noexcept)) {
896 // A noexcept in a requires expression.
897 nextToken();
898 }
899
900 if (FormatTok->is(tok::arrow)) {
901 // Following the } or noexcept we can find a trailing return type arrow
902 // as part of an implicit conversion constraint.
903 nextToken();
904 parseStructuralElement();
905 }
906
907 if (MunchSemi && FormatTok->is(tok::semi))
908 nextToken();
909
910 if (PPStartHash == PPEndHash) {
911 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
912 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
913 // Update the opening line to add the forward reference as well
914 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
915 CurrentLines->size() - 1;
916 }
917 }
918
919 return IfLBrace;
920}
921
922static bool isGoogScope(const UnwrappedLine &Line) {
923 // FIXME: Closure-library specific stuff should not be hard-coded but be
924 // configurable.
925 if (Line.Tokens.size() < 4)
926 return false;
927 auto I = Line.Tokens.begin();
928 if (I->Tok->TokenText != "goog")
929 return false;
930 ++I;
931 if (I->Tok->isNot(tok::period))
932 return false;
933 ++I;
934 if (I->Tok->TokenText != "scope")
935 return false;
936 ++I;
937 return I->Tok->is(tok::l_paren);
938}
939
940static bool isIIFE(const UnwrappedLine &Line,
941 const AdditionalKeywords &Keywords) {
942 // Look for the start of an immediately invoked anonymous function.
943 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
944 // This is commonly done in JavaScript to create a new, anonymous scope.
945 // Example: (function() { ... })()
946 if (Line.Tokens.size() < 3)
947 return false;
948 auto I = Line.Tokens.begin();
949 if (I->Tok->isNot(tok::l_paren))
950 return false;
951 ++I;
952 if (I->Tok->isNot(Keywords.kw_function))
953 return false;
954 ++I;
955 return I->Tok->is(tok::l_paren);
956}
957
958static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
959 const FormatToken &InitialToken) {
960 tok::TokenKind Kind = InitialToken.Tok.getKind();
961 if (InitialToken.is(TT_NamespaceMacro))
962 Kind = tok::kw_namespace;
963
964 switch (Kind) {
965 case tok::kw_namespace:
966 return Style.BraceWrapping.AfterNamespace;
967 case tok::kw_class:
968 return Style.BraceWrapping.AfterClass;
969 case tok::kw_union:
970 return Style.BraceWrapping.AfterUnion;
971 case tok::kw_struct:
972 return Style.BraceWrapping.AfterStruct;
973 case tok::kw_enum:
974 return Style.BraceWrapping.AfterEnum;
975 default:
976 return false;
977 }
978}
979
980void UnwrappedLineParser::parseChildBlock() {
981 assert(FormatTok->is(tok::l_brace));
982 FormatTok->setBlockKind(BK_Block);
983 const FormatToken *OpeningBrace = FormatTok;
984 nextToken();
985 {
986 bool SkipIndent = (Style.isJavaScript() &&
987 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
988 ScopedLineState LineState(*this);
989 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
990 /*MustBeDeclaration=*/false);
991 Line->Level += SkipIndent ? 0 : 1;
992 parseLevel(OpeningBrace);
993 flushComments(isOnNewLine(*FormatTok));
994 Line->Level -= SkipIndent ? 0 : 1;
995 }
996 nextToken();
997}
998
999void UnwrappedLineParser::parsePPDirective() {
1000 assert(FormatTok->is(tok::hash) && "'#' expected");
1001 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1002
1003 nextToken();
1004
1005 if (!FormatTok->Tok.getIdentifierInfo()) {
1006 parsePPUnknown();
1007 return;
1008 }
1009
1010 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1011 case tok::pp_define:
1012 parsePPDefine();
1013 return;
1014 case tok::pp_if:
1015 parsePPIf(/*IfDef=*/false);
1016 break;
1017 case tok::pp_ifdef:
1018 case tok::pp_ifndef:
1019 parsePPIf(/*IfDef=*/true);
1020 break;
1021 case tok::pp_else:
1022 case tok::pp_elifdef:
1023 case tok::pp_elifndef:
1024 case tok::pp_elif:
1025 parsePPElse();
1026 break;
1027 case tok::pp_endif:
1028 parsePPEndIf();
1029 break;
1030 case tok::pp_pragma:
1031 parsePPPragma();
1032 break;
1033 case tok::pp_error:
1034 case tok::pp_warning:
1035 nextToken();
1036 if (!eof() && Style.isCpp())
1037 FormatTok->setFinalizedType(TT_AfterPPDirective);
1038 [[fallthrough]];
1039 default:
1040 parsePPUnknown();
1041 break;
1042 }
1043}
1044
1045void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1046 size_t Line = CurrentLines->size();
1047 if (CurrentLines == &PreprocessorDirectives)
1048 Line += Lines.size();
1049
1050 if (Unreachable ||
1051 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1052 PPStack.push_back({PP_Unreachable, Line});
1053 } else {
1054 PPStack.push_back({PP_Conditional, Line});
1055 }
1056}
1057
1058void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1059 ++PPBranchLevel;
1060 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1061 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1062 PPLevelBranchIndex.push_back(0);
1063 PPLevelBranchCount.push_back(0);
1064 }
1065 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1066 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1067 conditionalCompilationCondition(Unreachable || Skip);
1068}
1069
1070void UnwrappedLineParser::conditionalCompilationAlternative() {
1071 if (!PPStack.empty())
1072 PPStack.pop_back();
1073 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1074 if (!PPChainBranchIndex.empty())
1075 ++PPChainBranchIndex.top();
1076 conditionalCompilationCondition(
1077 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1078 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1079}
1080
1081void UnwrappedLineParser::conditionalCompilationEnd() {
1082 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1083 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1084 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1085 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1086 }
1087 // Guard against #endif's without #if.
1088 if (PPBranchLevel > -1)
1089 --PPBranchLevel;
1090 if (!PPChainBranchIndex.empty())
1091 PPChainBranchIndex.pop();
1092 if (!PPStack.empty())
1093 PPStack.pop_back();
1094}
1095
1096void UnwrappedLineParser::parsePPIf(bool IfDef) {
1097 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1098 nextToken();
1099 bool Unreachable = false;
1100 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1101 Unreachable = true;
1102 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1103 Unreachable = true;
1104 conditionalCompilationStart(Unreachable);
1105 FormatToken *IfCondition = FormatTok;
1106 // If there's a #ifndef on the first line, and the only lines before it are
1107 // comments, it could be an include guard.
1108 bool MaybeIncludeGuard = IfNDef;
1109 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1110 for (auto &Line : Lines) {
1111 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1112 MaybeIncludeGuard = false;
1113 IncludeGuard = IG_Rejected;
1114 break;
1115 }
1116 }
1117 }
1118 --PPBranchLevel;
1119 parsePPUnknown();
1120 ++PPBranchLevel;
1121 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1122 IncludeGuard = IG_IfNdefed;
1123 IncludeGuardToken = IfCondition;
1124 }
1125}
1126
1127void UnwrappedLineParser::parsePPElse() {
1128 // If a potential include guard has an #else, it's not an include guard.
1129 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1130 IncludeGuard = IG_Rejected;
1131 // Don't crash when there is an #else without an #if.
1132 assert(PPBranchLevel >= -1);
1133 if (PPBranchLevel == -1)
1134 conditionalCompilationStart(/*Unreachable=*/true);
1135 conditionalCompilationAlternative();
1136 --PPBranchLevel;
1137 parsePPUnknown();
1138 ++PPBranchLevel;
1139}
1140
1141void UnwrappedLineParser::parsePPEndIf() {
1142 conditionalCompilationEnd();
1143 parsePPUnknown();
1144 // If the #endif of a potential include guard is the last thing in the file,
1145 // then we found an include guard.
1146 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1148 IncludeGuard = IG_Found;
1149 }
1150}
1151
1152void UnwrappedLineParser::parsePPDefine() {
1153 nextToken();
1154
1155 if (!FormatTok->Tok.getIdentifierInfo()) {
1156 IncludeGuard = IG_Rejected;
1157 IncludeGuardToken = nullptr;
1158 parsePPUnknown();
1159 return;
1160 }
1161
1162 if (IncludeGuard == IG_IfNdefed &&
1163 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1164 IncludeGuard = IG_Defined;
1165 IncludeGuardToken = nullptr;
1166 for (auto &Line : Lines) {
1167 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1168 IncludeGuard = IG_Rejected;
1169 break;
1170 }
1171 }
1172 }
1173
1174 // In the context of a define, even keywords should be treated as normal
1175 // identifiers. Setting the kind to identifier is not enough, because we need
1176 // to treat additional keywords like __except as well, which are already
1177 // identifiers. Setting the identifier info to null interferes with include
1178 // guard processing above, and changes preprocessing nesting.
1179 FormatTok->Tok.setKind(tok::identifier);
1181 nextToken();
1182 if (FormatTok->Tok.getKind() == tok::l_paren &&
1183 !FormatTok->hasWhitespaceBefore()) {
1184 parseParens();
1185 }
1187 Line->Level += PPBranchLevel + 1;
1188 addUnwrappedLine();
1189 ++Line->Level;
1190
1191 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1192 assert((int)Line->PPLevel >= 0);
1193 Line->InMacroBody = true;
1194
1195 if (Style.SkipMacroDefinitionBody) {
1196 while (!eof()) {
1197 FormatTok->Finalized = true;
1198 FormatTok = Tokens->getNextToken();
1199 }
1200 addUnwrappedLine();
1201 return;
1202 }
1203
1204 // Errors during a preprocessor directive can only affect the layout of the
1205 // preprocessor directive, and thus we ignore them. An alternative approach
1206 // would be to use the same approach we use on the file level (no
1207 // re-indentation if there was a structural error) within the macro
1208 // definition.
1209 parseFile();
1210}
1211
1212void UnwrappedLineParser::parsePPPragma() {
1213 Line->InPragmaDirective = true;
1214 parsePPUnknown();
1215}
1216
1217void UnwrappedLineParser::parsePPUnknown() {
1218 while (!eof())
1219 nextToken();
1221 Line->Level += PPBranchLevel + 1;
1222 addUnwrappedLine();
1223}
1224
1225// Here we exclude certain tokens that are not usually the first token in an
1226// unwrapped line. This is used in attempt to distinguish macro calls without
1227// trailing semicolons from other constructs split to several lines.
1228static bool tokenCanStartNewLine(const FormatToken &Tok) {
1229 // Semicolon can be a null-statement, l_square can be a start of a macro or
1230 // a C++11 attribute, but this doesn't seem to be common.
1231 return !Tok.isOneOf(tok::semi, tok::l_brace,
1232 // Tokens that can only be used as binary operators and a
1233 // part of overloaded operator names.
1234 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1235 tok::less, tok::greater, tok::slash, tok::percent,
1236 tok::lessless, tok::greatergreater, tok::equal,
1237 tok::plusequal, tok::minusequal, tok::starequal,
1238 tok::slashequal, tok::percentequal, tok::ampequal,
1239 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1240 tok::lesslessequal,
1241 // Colon is used in labels, base class lists, initializer
1242 // lists, range-based for loops, ternary operator, but
1243 // should never be the first token in an unwrapped line.
1244 tok::colon,
1245 // 'noexcept' is a trailing annotation.
1246 tok::kw_noexcept);
1247}
1248
1249static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1250 const FormatToken *FormatTok) {
1251 // FIXME: This returns true for C/C++ keywords like 'struct'.
1252 return FormatTok->is(tok::identifier) &&
1253 (!FormatTok->Tok.getIdentifierInfo() ||
1254 !FormatTok->isOneOf(
1255 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1256 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1257 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1258 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1259 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1260 Keywords.kw_instanceof, Keywords.kw_interface,
1261 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1262}
1263
1264static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1265 const FormatToken *FormatTok) {
1266 return FormatTok->Tok.isLiteral() ||
1267 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1268 mustBeJSIdent(Keywords, FormatTok);
1269}
1270
1271// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1272// when encountered after a value (see mustBeJSIdentOrValue).
1273static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1274 const FormatToken *FormatTok) {
1275 return FormatTok->isOneOf(
1276 tok::kw_return, Keywords.kw_yield,
1277 // conditionals
1278 tok::kw_if, tok::kw_else,
1279 // loops
1280 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1281 // switch/case
1282 tok::kw_switch, tok::kw_case,
1283 // exceptions
1284 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1285 // declaration
1286 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1287 Keywords.kw_async, Keywords.kw_function,
1288 // import/export
1289 Keywords.kw_import, tok::kw_export);
1290}
1291
1292// Checks whether a token is a type in K&R C (aka C78).
1293static bool isC78Type(const FormatToken &Tok) {
1294 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1295 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1296 tok::identifier);
1297}
1298
1299// This function checks whether a token starts the first parameter declaration
1300// in a K&R C (aka C78) function definition, e.g.:
1301// int f(a, b)
1302// short a, b;
1303// {
1304// return a + b;
1305// }
1306static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1307 const FormatToken *FuncName) {
1308 assert(Tok);
1309 assert(Next);
1310 assert(FuncName);
1311
1312 if (FuncName->isNot(tok::identifier))
1313 return false;
1314
1315 const FormatToken *Prev = FuncName->Previous;
1316 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1317 return false;
1318
1319 if (!isC78Type(*Tok) &&
1320 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1321 return false;
1322 }
1323
1324 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1325 return false;
1326
1327 Tok = Tok->Previous;
1328 if (!Tok || Tok->isNot(tok::r_paren))
1329 return false;
1330
1331 Tok = Tok->Previous;
1332 if (!Tok || Tok->isNot(tok::identifier))
1333 return false;
1334
1335 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1336}
1337
1338bool UnwrappedLineParser::parseModuleImport() {
1339 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1340
1341 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1342 !Token->Tok.getIdentifierInfo() &&
1343 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1344 return false;
1345 }
1346
1347 nextToken();
1348 while (!eof()) {
1349 if (FormatTok->is(tok::colon)) {
1350 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1351 }
1352 // Handle import <foo/bar.h> as we would an include statement.
1353 else if (FormatTok->is(tok::less)) {
1354 nextToken();
1355 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1356 // Mark tokens up to the trailing line comments as implicit string
1357 // literals.
1358 if (FormatTok->isNot(tok::comment) &&
1359 !FormatTok->TokenText.starts_with("//")) {
1360 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1361 }
1362 nextToken();
1363 }
1364 }
1365 if (FormatTok->is(tok::semi)) {
1366 nextToken();
1367 break;
1368 }
1369 nextToken();
1370 }
1371
1372 addUnwrappedLine();
1373 return true;
1374}
1375
1376// readTokenWithJavaScriptASI reads the next token and terminates the current
1377// line if JavaScript Automatic Semicolon Insertion must
1378// happen between the current token and the next token.
1379//
1380// This method is conservative - it cannot cover all edge cases of JavaScript,
1381// but only aims to correctly handle certain well known cases. It *must not*
1382// return true in speculative cases.
1383void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1384 FormatToken *Previous = FormatTok;
1385 readToken();
1386 FormatToken *Next = FormatTok;
1387
1388 bool IsOnSameLine =
1389 CommentsBeforeNextToken.empty()
1390 ? Next->NewlinesBefore == 0
1391 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1392 if (IsOnSameLine)
1393 return;
1394
1395 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1396 bool PreviousStartsTemplateExpr =
1397 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1398 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1399 // If the line contains an '@' sign, the previous token might be an
1400 // annotation, which can precede another identifier/value.
1401 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1402 return LineNode.Tok->is(tok::at);
1403 });
1404 if (HasAt)
1405 return;
1406 }
1407 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1408 return addUnwrappedLine();
1409 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1410 bool NextEndsTemplateExpr =
1411 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1412 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1413 (PreviousMustBeValue ||
1414 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1415 tok::minusminus))) {
1416 return addUnwrappedLine();
1417 }
1418 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1419 isJSDeclOrStmt(Keywords, Next)) {
1420 return addUnwrappedLine();
1421 }
1422}
1423
1424void UnwrappedLineParser::parseStructuralElement(
1425 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1426 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1427 if (Style.Language == FormatStyle::LK_TableGen &&
1428 FormatTok->is(tok::pp_include)) {
1429 nextToken();
1430 if (FormatTok->is(tok::string_literal))
1431 nextToken();
1432 addUnwrappedLine();
1433 return;
1434 }
1435
1436 if (IsCpp) {
1437 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1438 }
1439 } else if (Style.isVerilog()) {
1440 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1441 parseForOrWhileLoop(/*HasParens=*/false);
1442 return;
1443 }
1444 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1445 parseForOrWhileLoop();
1446 return;
1447 }
1448 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1449 Keywords.kw_assume, Keywords.kw_cover)) {
1450 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1451 return;
1452 }
1453
1454 // Skip things that can exist before keywords like 'if' and 'case'.
1455 while (true) {
1456 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1457 Keywords.kw_unique0)) {
1458 nextToken();
1459 } else if (FormatTok->is(tok::l_paren) &&
1460 Tokens->peekNextToken()->is(tok::star)) {
1461 parseParens();
1462 } else {
1463 break;
1464 }
1465 }
1466 }
1467
1468 // Tokens that only make sense at the beginning of a line.
1469 if (FormatTok->isAccessSpecifierKeyword()) {
1470 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471 Style.isCSharp()) {
1472 nextToken();
1473 } else {
1474 parseAccessSpecifier();
1475 }
1476 return;
1477 }
1478 switch (FormatTok->Tok.getKind()) {
1479 case tok::kw_asm:
1480 nextToken();
1481 if (FormatTok->is(tok::l_brace)) {
1482 FormatTok->setFinalizedType(TT_InlineASMBrace);
1483 nextToken();
1484 while (FormatTok && !eof()) {
1485 if (FormatTok->is(tok::r_brace)) {
1486 FormatTok->setFinalizedType(TT_InlineASMBrace);
1487 nextToken();
1488 addUnwrappedLine();
1489 break;
1490 }
1491 FormatTok->Finalized = true;
1492 nextToken();
1493 }
1494 }
1495 break;
1496 case tok::kw_namespace:
1497 parseNamespace();
1498 return;
1499 case tok::kw_if: {
1500 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1501 // field/method declaration.
1502 break;
1503 }
1504 FormatToken *Tok = parseIfThenElse(IfKind);
1505 if (IfLeftBrace)
1506 *IfLeftBrace = Tok;
1507 return;
1508 }
1509 case tok::kw_for:
1510 case tok::kw_while:
1511 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512 // field/method declaration.
1513 break;
1514 }
1515 parseForOrWhileLoop();
1516 return;
1517 case tok::kw_do:
1518 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519 // field/method declaration.
1520 break;
1521 }
1522 parseDoWhile();
1523 if (HasDoWhile)
1524 *HasDoWhile = true;
1525 return;
1526 case tok::kw_switch:
1527 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1528 // 'switch: string' field declaration.
1529 break;
1530 }
1531 parseSwitch(/*IsExpr=*/false);
1532 return;
1533 case tok::kw_default: {
1534 // In Verilog default along with other labels are handled in the next loop.
1535 if (Style.isVerilog())
1536 break;
1537 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538 // 'default: string' field declaration.
1539 break;
1540 }
1541 auto *Default = FormatTok;
1542 nextToken();
1543 if (FormatTok->is(tok::colon)) {
1544 FormatTok->setFinalizedType(TT_CaseLabelColon);
1545 parseLabel();
1546 return;
1547 }
1548 if (FormatTok->is(tok::arrow)) {
1549 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1550 Default->setFinalizedType(TT_SwitchExpressionLabel);
1551 parseLabel();
1552 return;
1553 }
1554 // e.g. "default void f() {}" in a Java interface.
1555 break;
1556 }
1557 case tok::kw_case:
1558 // Proto: there are no switch/case statements.
1559 if (Style.Language == FormatStyle::LK_Proto) {
1560 nextToken();
1561 return;
1562 }
1563 if (Style.isVerilog()) {
1564 parseBlock();
1565 addUnwrappedLine();
1566 return;
1567 }
1568 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1569 // 'case: string' field declaration.
1570 nextToken();
1571 break;
1572 }
1573 parseCaseLabel();
1574 return;
1575 case tok::kw_goto:
1576 nextToken();
1577 if (FormatTok->is(tok::kw_case))
1578 nextToken();
1579 break;
1580 case tok::kw_try:
1581 case tok::kw___try:
1582 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1583 // field/method declaration.
1584 break;
1585 }
1586 parseTryCatch();
1587 return;
1588 case tok::kw_extern:
1589 nextToken();
1590 if (Style.isVerilog()) {
1591 // In Verilog and extern module declaration looks like a start of module.
1592 // But there is no body and endmodule. So we handle it separately.
1593 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1594 parseVerilogHierarchyHeader();
1595 return;
1596 }
1597 } else if (FormatTok->is(tok::string_literal)) {
1598 nextToken();
1599 if (FormatTok->is(tok::l_brace)) {
1601 addUnwrappedLine();
1602 // Either we indent or for backwards compatibility we follow the
1603 // AfterExternBlock style.
1604 unsigned AddLevels =
1607 Style.IndentExternBlock ==
1609 ? 1u
1610 : 0u;
1611 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1612 addUnwrappedLine();
1613 return;
1614 }
1615 }
1616 break;
1617 case tok::kw_export:
1618 if (Style.isJavaScript()) {
1619 parseJavaScriptEs6ImportExport();
1620 return;
1621 }
1622 if (IsCpp) {
1623 nextToken();
1624 if (FormatTok->is(tok::kw_namespace)) {
1625 parseNamespace();
1626 return;
1627 }
1628 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1629 return;
1630 }
1631 break;
1632 case tok::kw_inline:
1633 nextToken();
1634 if (FormatTok->is(tok::kw_namespace)) {
1635 parseNamespace();
1636 return;
1637 }
1638 break;
1639 case tok::identifier:
1640 if (FormatTok->is(TT_ForEachMacro)) {
1641 parseForOrWhileLoop();
1642 return;
1643 }
1644 if (FormatTok->is(TT_MacroBlockBegin)) {
1645 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1646 /*MunchSemi=*/false);
1647 return;
1648 }
1649 if (FormatTok->is(Keywords.kw_import)) {
1650 if (Style.isJavaScript()) {
1651 parseJavaScriptEs6ImportExport();
1652 return;
1653 }
1654 if (Style.Language == FormatStyle::LK_Proto) {
1655 nextToken();
1656 if (FormatTok->is(tok::kw_public))
1657 nextToken();
1658 if (FormatTok->isNot(tok::string_literal))
1659 return;
1660 nextToken();
1661 if (FormatTok->is(tok::semi))
1662 nextToken();
1663 addUnwrappedLine();
1664 return;
1665 }
1666 if (IsCpp && parseModuleImport())
1667 return;
1668 }
1669 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1670 Keywords.kw_slots, Keywords.kw_qslots)) {
1671 nextToken();
1672 if (FormatTok->is(tok::colon)) {
1673 nextToken();
1674 addUnwrappedLine();
1675 return;
1676 }
1677 }
1678 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1679 parseStatementMacro();
1680 return;
1681 }
1682 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1683 parseNamespace();
1684 return;
1685 }
1686 // In Verilog labels can be any expression, so we don't do them here.
1687 // JS doesn't have macros, and within classes colons indicate fields, not
1688 // labels.
1689 // TableGen doesn't have labels.
1690 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1691 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1692 nextToken();
1693 if (!Line->InMacroBody || CurrentLines->size() > 1)
1694 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1695 FormatTok->setFinalizedType(TT_GotoLabelColon);
1696 parseLabel(!Style.IndentGotoLabels);
1697 if (HasLabel)
1698 *HasLabel = true;
1699 return;
1700 }
1701 // In all other cases, parse the declaration.
1702 break;
1703 default:
1704 break;
1705 }
1706
1707 for (const bool InRequiresExpression =
1708 OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
1709 TT_CompoundRequirementLBrace);
1710 !eof();) {
1711 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1712 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1713 Next && Next->isBinaryOperator()) {
1714 FormatTok->Tok.setKind(tok::identifier);
1715 }
1716 }
1717 const FormatToken *Previous = FormatTok->Previous;
1718 switch (FormatTok->Tok.getKind()) {
1719 case tok::at:
1720 nextToken();
1721 if (FormatTok->is(tok::l_brace)) {
1722 nextToken();
1723 parseBracedList();
1724 break;
1725 } else if (Style.Language == FormatStyle::LK_Java &&
1726 FormatTok->is(Keywords.kw_interface)) {
1727 nextToken();
1728 break;
1729 }
1730 switch (FormatTok->Tok.getObjCKeywordID()) {
1731 case tok::objc_public:
1732 case tok::objc_protected:
1733 case tok::objc_package:
1734 case tok::objc_private:
1735 return parseAccessSpecifier();
1736 case tok::objc_interface:
1737 case tok::objc_implementation:
1738 return parseObjCInterfaceOrImplementation();
1739 case tok::objc_protocol:
1740 if (parseObjCProtocol())
1741 return;
1742 break;
1743 case tok::objc_end:
1744 return; // Handled by the caller.
1745 case tok::objc_optional:
1746 case tok::objc_required:
1747 nextToken();
1748 addUnwrappedLine();
1749 return;
1750 case tok::objc_autoreleasepool:
1751 nextToken();
1752 if (FormatTok->is(tok::l_brace)) {
1755 addUnwrappedLine();
1756 }
1757 parseBlock();
1758 }
1759 addUnwrappedLine();
1760 return;
1761 case tok::objc_synchronized:
1762 nextToken();
1763 if (FormatTok->is(tok::l_paren)) {
1764 // Skip synchronization object
1765 parseParens();
1766 }
1767 if (FormatTok->is(tok::l_brace)) {
1770 addUnwrappedLine();
1771 }
1772 parseBlock();
1773 }
1774 addUnwrappedLine();
1775 return;
1776 case tok::objc_try:
1777 // This branch isn't strictly necessary (the kw_try case below would
1778 // do this too after the tok::at is parsed above). But be explicit.
1779 parseTryCatch();
1780 return;
1781 default:
1782 break;
1783 }
1784 break;
1785 case tok::kw_requires: {
1786 if (IsCpp) {
1787 bool ParsedClause = parseRequires();
1788 if (ParsedClause)
1789 return;
1790 } else {
1791 nextToken();
1792 }
1793 break;
1794 }
1795 case tok::kw_enum:
1796 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1797 // "template <..., enum ...>".
1798 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1799 nextToken();
1800 break;
1801 }
1802
1803 // parseEnum falls through and does not yet add an unwrapped line as an
1804 // enum definition can start a structural element.
1805 if (!parseEnum())
1806 break;
1807 // This only applies to C++ and Verilog.
1808 if (!IsCpp && !Style.isVerilog()) {
1809 addUnwrappedLine();
1810 return;
1811 }
1812 break;
1813 case tok::kw_typedef:
1814 nextToken();
1815 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1816 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1817 Keywords.kw_CF_CLOSED_ENUM,
1818 Keywords.kw_NS_CLOSED_ENUM)) {
1819 parseEnum();
1820 }
1821 break;
1822 case tok::kw_class:
1823 if (Style.isVerilog()) {
1824 parseBlock();
1825 addUnwrappedLine();
1826 return;
1827 }
1828 if (Style.isTableGen()) {
1829 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1830 // This is same as def and so on.
1831 nextToken();
1832 break;
1833 }
1834 [[fallthrough]];
1835 case tok::kw_struct:
1836 case tok::kw_union:
1837 if (parseStructLike())
1838 return;
1839 break;
1840 case tok::kw_decltype:
1841 nextToken();
1842 if (FormatTok->is(tok::l_paren)) {
1843 parseParens();
1844 assert(FormatTok->Previous);
1845 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1846 tok::l_paren)) {
1847 Line->SeenDecltypeAuto = true;
1848 }
1849 }
1850 break;
1851 case tok::period:
1852 nextToken();
1853 // In Java, classes have an implicit static member "class".
1854 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1855 FormatTok->is(tok::kw_class)) {
1856 nextToken();
1857 }
1858 if (Style.isJavaScript() && FormatTok &&
1859 FormatTok->Tok.getIdentifierInfo()) {
1860 // JavaScript only has pseudo keywords, all keywords are allowed to
1861 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1862 nextToken();
1863 }
1864 break;
1865 case tok::semi:
1866 nextToken();
1867 addUnwrappedLine();
1868 return;
1869 case tok::r_brace:
1870 addUnwrappedLine();
1871 return;
1872 case tok::l_paren: {
1873 parseParens();
1874 // Break the unwrapped line if a K&R C function definition has a parameter
1875 // declaration.
1876 if (OpeningBrace || !IsCpp || !Previous || eof())
1877 break;
1878 if (isC78ParameterDecl(FormatTok,
1879 Tokens->peekNextToken(/*SkipComment=*/true),
1880 Previous)) {
1881 addUnwrappedLine();
1882 return;
1883 }
1884 break;
1885 }
1886 case tok::kw_operator:
1887 nextToken();
1888 if (FormatTok->isBinaryOperator())
1889 nextToken();
1890 break;
1891 case tok::caret:
1892 nextToken();
1893 // Block return type.
1894 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1895 nextToken();
1896 // Return types: pointers are ok too.
1897 while (FormatTok->is(tok::star))
1898 nextToken();
1899 }
1900 // Block argument list.
1901 if (FormatTok->is(tok::l_paren))
1902 parseParens();
1903 // Block body.
1904 if (FormatTok->is(tok::l_brace))
1905 parseChildBlock();
1906 break;
1907 case tok::l_brace:
1908 if (InRequiresExpression)
1909 FormatTok->setFinalizedType(TT_BracedListLBrace);
1910 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1911 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1912 // A block outside of parentheses must be the last part of a
1913 // structural element.
1914 // FIXME: Figure out cases where this is not true, and add projections
1915 // for them (the one we know is missing are lambdas).
1916 if (Style.Language == FormatStyle::LK_Java &&
1917 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1918 // If necessary, we could set the type to something different than
1919 // TT_FunctionLBrace.
1922 addUnwrappedLine();
1923 }
1924 } else if (Style.BraceWrapping.AfterFunction) {
1925 addUnwrappedLine();
1926 }
1927 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1928 FormatTok->setFinalizedType(TT_FunctionLBrace);
1929 parseBlock();
1930 IsDecltypeAutoFunction = false;
1931 addUnwrappedLine();
1932 return;
1933 }
1934 // Otherwise this was a braced init list, and the structural
1935 // element continues.
1936 break;
1937 case tok::kw_try:
1938 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1939 // field/method declaration.
1940 nextToken();
1941 break;
1942 }
1943 // We arrive here when parsing function-try blocks.
1944 if (Style.BraceWrapping.AfterFunction)
1945 addUnwrappedLine();
1946 parseTryCatch();
1947 return;
1948 case tok::identifier: {
1949 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1950 Line->MustBeDeclaration) {
1951 addUnwrappedLine();
1952 parseCSharpGenericTypeConstraint();
1953 break;
1954 }
1955 if (FormatTok->is(TT_MacroBlockEnd)) {
1956 addUnwrappedLine();
1957 return;
1958 }
1959
1960 // Function declarations (as opposed to function expressions) are parsed
1961 // on their own unwrapped line by continuing this loop. Function
1962 // expressions (functions that are not on their own line) must not create
1963 // a new unwrapped line, so they are special cased below.
1964 size_t TokenCount = Line->Tokens.size();
1965 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1966 (TokenCount > 1 ||
1967 (TokenCount == 1 &&
1968 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1969 tryToParseJSFunction();
1970 break;
1971 }
1972 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1973 FormatTok->is(Keywords.kw_interface)) {
1974 if (Style.isJavaScript()) {
1975 // In JavaScript/TypeScript, "interface" can be used as a standalone
1976 // identifier, e.g. in `var interface = 1;`. If "interface" is
1977 // followed by another identifier, it is very like to be an actual
1978 // interface declaration.
1979 unsigned StoredPosition = Tokens->getPosition();
1980 FormatToken *Next = Tokens->getNextToken();
1981 FormatTok = Tokens->setPosition(StoredPosition);
1982 if (!mustBeJSIdent(Keywords, Next)) {
1983 nextToken();
1984 break;
1985 }
1986 }
1987 parseRecord();
1988 addUnwrappedLine();
1989 return;
1990 }
1991
1992 if (Style.isVerilog()) {
1993 if (FormatTok->is(Keywords.kw_table)) {
1994 parseVerilogTable();
1995 return;
1996 }
1997 if (Keywords.isVerilogBegin(*FormatTok) ||
1998 Keywords.isVerilogHierarchy(*FormatTok)) {
1999 parseBlock();
2000 addUnwrappedLine();
2001 return;
2002 }
2003 }
2004
2005 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
2006 if (parseStructLike())
2007 return;
2008 break;
2009 }
2010
2011 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2012 parseStatementMacro();
2013 return;
2014 }
2015
2016 // See if the following token should start a new unwrapped line.
2017 StringRef Text = FormatTok->TokenText;
2018
2019 FormatToken *PreviousToken = FormatTok;
2020 nextToken();
2021
2022 // JS doesn't have macros, and within classes colons indicate fields, not
2023 // labels.
2024 if (Style.isJavaScript())
2025 break;
2026
2027 auto OneTokenSoFar = [&]() {
2028 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2029 while (I != E && I->Tok->is(tok::comment))
2030 ++I;
2031 if (Style.isVerilog())
2032 while (I != E && I->Tok->is(tok::hash))
2033 ++I;
2034 return I != E && (++I == E);
2035 };
2036 if (OneTokenSoFar()) {
2037 // Recognize function-like macro usages without trailing semicolon as
2038 // well as free-standing macros like Q_OBJECT.
2039 bool FunctionLike = FormatTok->is(tok::l_paren);
2040 if (FunctionLike)
2041 parseParens();
2042
2043 bool FollowedByNewline =
2044 CommentsBeforeNextToken.empty()
2045 ? FormatTok->NewlinesBefore > 0
2046 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2047
2048 if (FollowedByNewline &&
2049 (Text.size() >= 5 ||
2050 (FunctionLike && FormatTok->isNot(tok::l_paren))) &&
2051 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2052 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2053 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2054 addUnwrappedLine();
2055 return;
2056 }
2057 }
2058 break;
2059 }
2060 case tok::equal:
2061 if ((Style.isJavaScript() || Style.isCSharp()) &&
2062 FormatTok->is(TT_FatArrow)) {
2063 tryToParseChildBlock();
2064 break;
2065 }
2066
2067 nextToken();
2068 if (FormatTok->is(tok::l_brace)) {
2069 // Block kind should probably be set to BK_BracedInit for any language.
2070 // C# needs this change to ensure that array initialisers and object
2071 // initialisers are indented the same way.
2072 if (Style.isCSharp())
2073 FormatTok->setBlockKind(BK_BracedInit);
2074 // TableGen's defset statement has syntax of the form,
2075 // `defset <type> <name> = { <statement>... }`
2076 if (Style.isTableGen() &&
2077 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2078 FormatTok->setFinalizedType(TT_FunctionLBrace);
2079 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2080 /*MunchSemi=*/false);
2081 addUnwrappedLine();
2082 break;
2083 }
2084 nextToken();
2085 parseBracedList();
2086 } else if (Style.Language == FormatStyle::LK_Proto &&
2087 FormatTok->is(tok::less)) {
2088 nextToken();
2089 parseBracedList(/*IsAngleBracket=*/true);
2090 }
2091 break;
2092 case tok::l_square:
2093 parseSquare();
2094 break;
2095 case tok::kw_new:
2096 parseNew();
2097 break;
2098 case tok::kw_switch:
2099 if (Style.Language == FormatStyle::LK_Java)
2100 parseSwitch(/*IsExpr=*/true);
2101 else
2102 nextToken();
2103 break;
2104 case tok::kw_case:
2105 // Proto: there are no switch/case statements.
2106 if (Style.Language == FormatStyle::LK_Proto) {
2107 nextToken();
2108 return;
2109 }
2110 // In Verilog switch is called case.
2111 if (Style.isVerilog()) {
2112 parseBlock();
2113 addUnwrappedLine();
2114 return;
2115 }
2116 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2117 // 'case: string' field declaration.
2118 nextToken();
2119 break;
2120 }
2121 parseCaseLabel();
2122 break;
2123 case tok::kw_default:
2124 nextToken();
2125 if (Style.isVerilog()) {
2126 if (FormatTok->is(tok::colon)) {
2127 // The label will be handled in the next iteration.
2128 break;
2129 }
2130 if (FormatTok->is(Keywords.kw_clocking)) {
2131 // A default clocking block.
2132 parseBlock();
2133 addUnwrappedLine();
2134 return;
2135 }
2136 parseVerilogCaseLabel();
2137 return;
2138 }
2139 break;
2140 case tok::colon:
2141 nextToken();
2142 if (Style.isVerilog()) {
2143 parseVerilogCaseLabel();
2144 return;
2145 }
2146 break;
2147 case tok::greater:
2148 nextToken();
2149 if (FormatTok->is(tok::l_brace))
2150 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2151 break;
2152 default:
2153 nextToken();
2154 break;
2155 }
2156 }
2157}
2158
2159bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2160 assert(FormatTok->is(tok::l_brace));
2161 if (!Style.isCSharp())
2162 return false;
2163 // See if it's a property accessor.
2164 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier))
2165 return false;
2166
2167 // See if we are inside a property accessor.
2168 //
2169 // Record the current tokenPosition so that we can advance and
2170 // reset the current token. `Next` is not set yet so we need
2171 // another way to advance along the token stream.
2172 unsigned int StoredPosition = Tokens->getPosition();
2173 FormatToken *Tok = Tokens->getNextToken();
2174
2175 // A trivial property accessor is of the form:
2176 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2177 // Track these as they do not require line breaks to be introduced.
2178 bool HasSpecialAccessor = false;
2179 bool IsTrivialPropertyAccessor = true;
2180 bool HasAttribute = false;
2181 while (!eof()) {
2182 if (const bool IsAccessorKeyword =
2183 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set);
2184 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2185 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) {
2186 if (IsAccessorKeyword)
2187 HasSpecialAccessor = true;
2188 else if (Tok->is(tok::l_square))
2189 HasAttribute = true;
2190 Tok = Tokens->getNextToken();
2191 continue;
2192 }
2193 if (Tok->isNot(tok::r_brace))
2194 IsTrivialPropertyAccessor = false;
2195 break;
2196 }
2197
2198 if (!HasSpecialAccessor || HasAttribute) {
2199 Tokens->setPosition(StoredPosition);
2200 return false;
2201 }
2202
2203 // Try to parse the property accessor:
2204 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2205 Tokens->setPosition(StoredPosition);
2206 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2207 addUnwrappedLine();
2208 nextToken();
2209 do {
2210 switch (FormatTok->Tok.getKind()) {
2211 case tok::r_brace:
2212 nextToken();
2213 if (FormatTok->is(tok::equal)) {
2214 while (!eof() && FormatTok->isNot(tok::semi))
2215 nextToken();
2216 nextToken();
2217 }
2218 addUnwrappedLine();
2219 return true;
2220 case tok::l_brace:
2221 ++Line->Level;
2222 parseBlock(/*MustBeDeclaration=*/true);
2223 addUnwrappedLine();
2224 --Line->Level;
2225 break;
2226 case tok::equal:
2227 if (FormatTok->is(TT_FatArrow)) {
2228 ++Line->Level;
2229 do {
2230 nextToken();
2231 } while (!eof() && FormatTok->isNot(tok::semi));
2232 nextToken();
2233 addUnwrappedLine();
2234 --Line->Level;
2235 break;
2236 }
2237 nextToken();
2238 break;
2239 default:
2240 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2241 Keywords.kw_set) &&
2242 !IsTrivialPropertyAccessor) {
2243 // Non-trivial get/set needs to be on its own line.
2244 addUnwrappedLine();
2245 }
2246 nextToken();
2247 }
2248 } while (!eof());
2249
2250 // Unreachable for well-formed code (paired '{' and '}').
2251 return true;
2252}
2253
2254bool UnwrappedLineParser::tryToParseLambda() {
2255 assert(FormatTok->is(tok::l_square));
2256 if (!IsCpp) {
2257 nextToken();
2258 return false;
2259 }
2260 FormatToken &LSquare = *FormatTok;
2261 if (!tryToParseLambdaIntroducer())
2262 return false;
2263
2264 bool SeenArrow = false;
2265 bool InTemplateParameterList = false;
2266
2267 while (FormatTok->isNot(tok::l_brace)) {
2268 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2269 nextToken();
2270 continue;
2271 }
2272 switch (FormatTok->Tok.getKind()) {
2273 case tok::l_brace:
2274 break;
2275 case tok::l_paren:
2276 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2277 break;
2278 case tok::l_square:
2279 parseSquare();
2280 break;
2281 case tok::less:
2282 assert(FormatTok->Previous);
2283 if (FormatTok->Previous->is(tok::r_square))
2284 InTemplateParameterList = true;
2285 nextToken();
2286 break;
2287 case tok::kw_auto:
2288 case tok::kw_class:
2289 case tok::kw_struct:
2290 case tok::kw_union:
2291 case tok::kw_template:
2292 case tok::kw_typename:
2293 case tok::amp:
2294 case tok::star:
2295 case tok::kw_const:
2296 case tok::kw_constexpr:
2297 case tok::kw_consteval:
2298 case tok::comma:
2299 case tok::greater:
2300 case tok::identifier:
2301 case tok::numeric_constant:
2302 case tok::coloncolon:
2303 case tok::kw_mutable:
2304 case tok::kw_noexcept:
2305 case tok::kw_static:
2306 nextToken();
2307 break;
2308 // Specialization of a template with an integer parameter can contain
2309 // arithmetic, logical, comparison and ternary operators.
2310 //
2311 // FIXME: This also accepts sequences of operators that are not in the scope
2312 // of a template argument list.
2313 //
2314 // In a C++ lambda a template type can only occur after an arrow. We use
2315 // this as an heuristic to distinguish between Objective-C expressions
2316 // followed by an `a->b` expression, such as:
2317 // ([obj func:arg] + a->b)
2318 // Otherwise the code below would parse as a lambda.
2319 case tok::plus:
2320 case tok::minus:
2321 case tok::exclaim:
2322 case tok::tilde:
2323 case tok::slash:
2324 case tok::percent:
2325 case tok::lessless:
2326 case tok::pipe:
2327 case tok::pipepipe:
2328 case tok::ampamp:
2329 case tok::caret:
2330 case tok::equalequal:
2331 case tok::exclaimequal:
2332 case tok::greaterequal:
2333 case tok::lessequal:
2334 case tok::question:
2335 case tok::colon:
2336 case tok::ellipsis:
2337 case tok::kw_true:
2338 case tok::kw_false:
2339 if (SeenArrow || InTemplateParameterList) {
2340 nextToken();
2341 break;
2342 }
2343 return true;
2344 case tok::arrow:
2345 // This might or might not actually be a lambda arrow (this could be an
2346 // ObjC method invocation followed by a dereferencing arrow). We might
2347 // reset this back to TT_Unknown in TokenAnnotator.
2348 FormatTok->setFinalizedType(TT_LambdaArrow);
2349 SeenArrow = true;
2350 nextToken();
2351 break;
2352 case tok::kw_requires: {
2353 auto *RequiresToken = FormatTok;
2354 nextToken();
2355 parseRequiresClause(RequiresToken);
2356 break;
2357 }
2358 case tok::equal:
2359 if (!InTemplateParameterList)
2360 return true;
2361 nextToken();
2362 break;
2363 default:
2364 return true;
2365 }
2366 }
2367
2368 FormatTok->setFinalizedType(TT_LambdaLBrace);
2369 LSquare.setFinalizedType(TT_LambdaLSquare);
2370
2371 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2372 parseChildBlock();
2373 assert(!NestedLambdas.empty());
2374 NestedLambdas.pop_back();
2375
2376 return true;
2377}
2378
2379bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2380 const FormatToken *Previous = FormatTok->Previous;
2381 const FormatToken *LeftSquare = FormatTok;
2382 nextToken();
2383 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2384 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2385 tok::kw_co_yield, tok::kw_co_return)) ||
2386 Previous->closesScope())) ||
2387 LeftSquare->isCppStructuredBinding(IsCpp)) {
2388 return false;
2389 }
2390 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2391 return false;
2392 if (FormatTok->is(tok::r_square)) {
2393 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2394 if (Next->is(tok::greater))
2395 return false;
2396 }
2397 parseSquare(/*LambdaIntroducer=*/true);
2398 return true;
2399}
2400
2401void UnwrappedLineParser::tryToParseJSFunction() {
2402 assert(FormatTok->is(Keywords.kw_function));
2403 if (FormatTok->is(Keywords.kw_async))
2404 nextToken();
2405 // Consume "function".
2406 nextToken();
2407
2408 // Consume * (generator function). Treat it like C++'s overloaded operators.
2409 if (FormatTok->is(tok::star)) {
2410 FormatTok->setFinalizedType(TT_OverloadedOperator);
2411 nextToken();
2412 }
2413
2414 // Consume function name.
2415 if (FormatTok->is(tok::identifier))
2416 nextToken();
2417
2418 if (FormatTok->isNot(tok::l_paren))
2419 return;
2420
2421 // Parse formal parameter list.
2422 parseParens();
2423
2424 if (FormatTok->is(tok::colon)) {
2425 // Parse a type definition.
2426 nextToken();
2427
2428 // Eat the type declaration. For braced inline object types, balance braces,
2429 // otherwise just parse until finding an l_brace for the function body.
2430 if (FormatTok->is(tok::l_brace))
2431 tryToParseBracedList();
2432 else
2433 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2434 nextToken();
2435 }
2436
2437 if (FormatTok->is(tok::semi))
2438 return;
2439
2440 parseChildBlock();
2441}
2442
2443bool UnwrappedLineParser::tryToParseBracedList() {
2444 if (FormatTok->is(BK_Unknown))
2445 calculateBraceTypes();
2446 assert(FormatTok->isNot(BK_Unknown));
2447 if (FormatTok->is(BK_Block))
2448 return false;
2449 nextToken();
2450 parseBracedList();
2451 return true;
2452}
2453
2454bool UnwrappedLineParser::tryToParseChildBlock() {
2455 assert(Style.isJavaScript() || Style.isCSharp());
2456 assert(FormatTok->is(TT_FatArrow));
2457 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2458 // They always start an expression or a child block if followed by a curly
2459 // brace.
2460 nextToken();
2461 if (FormatTok->isNot(tok::l_brace))
2462 return false;
2463 parseChildBlock();
2464 return true;
2465}
2466
2467bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2468 assert(!IsAngleBracket || !IsEnum);
2469 bool HasError = false;
2470
2471 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2472 // replace this by using parseAssignmentExpression() inside.
2473 do {
2474 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2475 tryToParseChildBlock()) {
2476 continue;
2477 }
2478 if (Style.isJavaScript()) {
2479 if (FormatTok->is(Keywords.kw_function)) {
2480 tryToParseJSFunction();
2481 continue;
2482 }
2483 if (FormatTok->is(tok::l_brace)) {
2484 // Could be a method inside of a braced list `{a() { return 1; }}`.
2485 if (tryToParseBracedList())
2486 continue;
2487 parseChildBlock();
2488 }
2489 }
2490 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2491 if (IsEnum) {
2492 FormatTok->setBlockKind(BK_Block);
2494 addUnwrappedLine();
2495 }
2496 nextToken();
2497 return !HasError;
2498 }
2499 switch (FormatTok->Tok.getKind()) {
2500 case tok::l_square:
2501 if (Style.isCSharp())
2502 parseSquare();
2503 else
2504 tryToParseLambda();
2505 break;
2506 case tok::l_paren:
2507 parseParens();
2508 // JavaScript can just have free standing methods and getters/setters in
2509 // object literals. Detect them by a "{" following ")".
2510 if (Style.isJavaScript()) {
2511 if (FormatTok->is(tok::l_brace))
2512 parseChildBlock();
2513 break;
2514 }
2515 break;
2516 case tok::l_brace:
2517 // Assume there are no blocks inside a braced init list apart
2518 // from the ones we explicitly parse out (like lambdas).
2519 FormatTok->setBlockKind(BK_BracedInit);
2520 if (!IsAngleBracket) {
2521 auto *Prev = FormatTok->Previous;
2522 if (Prev && Prev->is(tok::greater))
2523 Prev->setFinalizedType(TT_TemplateCloser);
2524 }
2525 nextToken();
2526 parseBracedList();
2527 break;
2528 case tok::less:
2529 nextToken();
2530 if (IsAngleBracket)
2531 parseBracedList(/*IsAngleBracket=*/true);
2532 break;
2533 case tok::semi:
2534 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2535 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2536 // used for error recovery if we have otherwise determined that this is
2537 // a braced list.
2538 if (Style.isJavaScript()) {
2539 nextToken();
2540 break;
2541 }
2542 HasError = true;
2543 if (!IsEnum)
2544 return false;
2545 nextToken();
2546 break;
2547 case tok::comma:
2548 nextToken();
2549 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2550 addUnwrappedLine();
2551 break;
2552 default:
2553 nextToken();
2554 break;
2555 }
2556 } while (!eof());
2557 return false;
2558}
2559
2560/// \brief Parses a pair of parentheses (and everything between them).
2561/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2562/// double ampersands. This applies for all nested scopes as well.
2563///
2564/// Returns whether there is a `=` token between the parentheses.
2565bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2566 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2567 auto *LeftParen = FormatTok;
2568 bool SeenComma = false;
2569 bool SeenEqual = false;
2570 bool MightBeFoldExpr = false;
2571 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2572 nextToken();
2573 do {
2574 switch (FormatTok->Tok.getKind()) {
2575 case tok::l_paren:
2576 if (parseParens(AmpAmpTokenType))
2577 SeenEqual = true;
2578 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2579 parseChildBlock();
2580 break;
2581 case tok::r_paren: {
2582 auto *Prev = LeftParen->Previous;
2583 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2585 const auto *Next = Tokens->peekNextToken();
2586 const bool DoubleParens =
2587 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2588 const bool CommaSeparated =
2589 !DoubleParens && Prev && Prev->isOneOf(tok::l_paren, tok::comma) &&
2590 Next && Next->isOneOf(tok::comma, tok::r_paren);
2591 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2592 const bool Excluded =
2593 PrevPrev &&
2594 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2595 SeenComma ||
2596 (SeenEqual &&
2597 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2598 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2599 const bool ReturnParens =
2601 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2602 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2603 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2604 Next->is(tok::semi);
2605 if ((DoubleParens && !Excluded) || (CommaSeparated && !SeenComma) ||
2606 ReturnParens) {
2607 LeftParen->Optional = true;
2608 FormatTok->Optional = true;
2609 }
2610 }
2611 if (Prev) {
2612 if (Prev->is(TT_TypenameMacro)) {
2613 LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2614 FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2615 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2616 Prev->setFinalizedType(TT_TemplateCloser);
2617 }
2618 }
2619 nextToken();
2620 return SeenEqual;
2621 }
2622 case tok::r_brace:
2623 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2624 return SeenEqual;
2625 case tok::l_square:
2626 tryToParseLambda();
2627 break;
2628 case tok::l_brace:
2629 if (!tryToParseBracedList())
2630 parseChildBlock();
2631 break;
2632 case tok::at:
2633 nextToken();
2634 if (FormatTok->is(tok::l_brace)) {
2635 nextToken();
2636 parseBracedList();
2637 }
2638 break;
2639 case tok::comma:
2640 SeenComma = true;
2641 nextToken();
2642 break;
2643 case tok::ellipsis:
2644 MightBeFoldExpr = true;
2645 nextToken();
2646 break;
2647 case tok::equal:
2648 SeenEqual = true;
2649 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2650 tryToParseChildBlock();
2651 else
2652 nextToken();
2653 break;
2654 case tok::kw_class:
2655 if (Style.isJavaScript())
2656 parseRecord(/*ParseAsExpr=*/true);
2657 else
2658 nextToken();
2659 break;
2660 case tok::identifier:
2661 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2662 tryToParseJSFunction();
2663 else
2664 nextToken();
2665 break;
2666 case tok::kw_switch:
2667 if (Style.Language == FormatStyle::LK_Java)
2668 parseSwitch(/*IsExpr=*/true);
2669 else
2670 nextToken();
2671 break;
2672 case tok::kw_requires: {
2673 auto RequiresToken = FormatTok;
2674 nextToken();
2675 parseRequiresExpression(RequiresToken);
2676 break;
2677 }
2678 case tok::ampamp:
2679 if (AmpAmpTokenType != TT_Unknown)
2680 FormatTok->setFinalizedType(AmpAmpTokenType);
2681 [[fallthrough]];
2682 default:
2683 nextToken();
2684 break;
2685 }
2686 } while (!eof());
2687 return SeenEqual;
2688}
2689
2690void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2691 if (!LambdaIntroducer) {
2692 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2693 if (tryToParseLambda())
2694 return;
2695 }
2696 do {
2697 switch (FormatTok->Tok.getKind()) {
2698 case tok::l_paren:
2699 parseParens();
2700 break;
2701 case tok::r_square:
2702 nextToken();
2703 return;
2704 case tok::r_brace:
2705 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2706 return;
2707 case tok::l_square:
2708 parseSquare();
2709 break;
2710 case tok::l_brace: {
2711 if (!tryToParseBracedList())
2712 parseChildBlock();
2713 break;
2714 }
2715 case tok::at:
2716 case tok::colon:
2717 nextToken();
2718 if (FormatTok->is(tok::l_brace)) {
2719 nextToken();
2720 parseBracedList();
2721 }
2722 break;
2723 default:
2724 nextToken();
2725 break;
2726 }
2727 } while (!eof());
2728}
2729
2730void UnwrappedLineParser::keepAncestorBraces() {
2731 if (!Style.RemoveBracesLLVM)
2732 return;
2733
2734 const int MaxNestingLevels = 2;
2735 const int Size = NestedTooDeep.size();
2736 if (Size >= MaxNestingLevels)
2737 NestedTooDeep[Size - MaxNestingLevels] = true;
2738 NestedTooDeep.push_back(false);
2739}
2740
2742 for (const auto &Token : llvm::reverse(Line.Tokens))
2743 if (Token.Tok->isNot(tok::comment))
2744 return Token.Tok;
2745
2746 return nullptr;
2747}
2748
2749void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2750 FormatToken *Tok = nullptr;
2751
2752 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2753 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2755 ? getLastNonComment(*Line)
2756 : Line->Tokens.back().Tok;
2757 assert(Tok);
2758 if (Tok->BraceCount < 0) {
2759 assert(Tok->BraceCount == -1);
2760 Tok = nullptr;
2761 } else {
2762 Tok->BraceCount = -1;
2763 }
2764 }
2765
2766 addUnwrappedLine();
2767 ++Line->Level;
2768 ++Line->UnbracedBodyLevel;
2769 parseStructuralElement();
2770 --Line->UnbracedBodyLevel;
2771
2772 if (Tok) {
2773 assert(!Line->InPPDirective);
2774 Tok = nullptr;
2775 for (const auto &L : llvm::reverse(*CurrentLines)) {
2776 if (!L.InPPDirective && getLastNonComment(L)) {
2777 Tok = L.Tokens.back().Tok;
2778 break;
2779 }
2780 }
2781 assert(Tok);
2782 ++Tok->BraceCount;
2783 }
2784
2785 if (CheckEOF && eof())
2786 addUnwrappedLine();
2787
2788 --Line->Level;
2789}
2790
2791static void markOptionalBraces(FormatToken *LeftBrace) {
2792 if (!LeftBrace)
2793 return;
2794
2795 assert(LeftBrace->is(tok::l_brace));
2796
2797 FormatToken *RightBrace = LeftBrace->MatchingParen;
2798 if (!RightBrace) {
2799 assert(!LeftBrace->Optional);
2800 return;
2801 }
2802
2803 assert(RightBrace->is(tok::r_brace));
2804 assert(RightBrace->MatchingParen == LeftBrace);
2805 assert(LeftBrace->Optional == RightBrace->Optional);
2806
2807 LeftBrace->Optional = true;
2808 RightBrace->Optional = true;
2809}
2810
2811void UnwrappedLineParser::handleAttributes() {
2812 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2813 if (FormatTok->isAttribute())
2814 nextToken();
2815 else if (FormatTok->is(tok::l_square))
2816 handleCppAttributes();
2817}
2818
2819bool UnwrappedLineParser::handleCppAttributes() {
2820 // Handle [[likely]] / [[unlikely]] attributes.
2821 assert(FormatTok->is(tok::l_square));
2822 if (!tryToParseSimpleAttribute())
2823 return false;
2824 parseSquare();
2825 return true;
2826}
2827
2828/// Returns whether \c Tok begins a block.
2829bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2830 // FIXME: rename the function or make
2831 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2832 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2833 : Tok.is(tok::l_brace);
2834}
2835
2836FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2837 bool KeepBraces,
2838 bool IsVerilogAssert) {
2839 assert((FormatTok->is(tok::kw_if) ||
2840 (Style.isVerilog() &&
2841 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2842 Keywords.kw_assume, Keywords.kw_cover))) &&
2843 "'if' expected");
2844 nextToken();
2845
2846 if (IsVerilogAssert) {
2847 // Handle `assert #0` and `assert final`.
2848 if (FormatTok->is(Keywords.kw_verilogHash)) {
2849 nextToken();
2850 if (FormatTok->is(tok::numeric_constant))
2851 nextToken();
2852 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2853 Keywords.kw_sequence)) {
2854 nextToken();
2855 }
2856 }
2857
2858 // TableGen's if statement has the form of `if <cond> then { ... }`.
2859 if (Style.isTableGen()) {
2860 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2861 // Simply skip until then. This range only contains a value.
2862 nextToken();
2863 }
2864 }
2865
2866 // Handle `if !consteval`.
2867 if (FormatTok->is(tok::exclaim))
2868 nextToken();
2869
2870 bool KeepIfBraces = true;
2871 if (FormatTok->is(tok::kw_consteval)) {
2872 nextToken();
2873 } else {
2874 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2875 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2876 nextToken();
2877 if (FormatTok->is(tok::l_paren)) {
2878 FormatTok->setFinalizedType(TT_ConditionLParen);
2879 parseParens();
2880 }
2881 }
2882 handleAttributes();
2883 // The then action is optional in Verilog assert statements.
2884 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2885 nextToken();
2886 addUnwrappedLine();
2887 return nullptr;
2888 }
2889
2890 bool NeedsUnwrappedLine = false;
2891 keepAncestorBraces();
2892
2893 FormatToken *IfLeftBrace = nullptr;
2894 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2895
2896 if (isBlockBegin(*FormatTok)) {
2897 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2898 IfLeftBrace = FormatTok;
2899 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2900 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2901 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2902 setPreviousRBraceType(TT_ControlStatementRBrace);
2903 if (Style.BraceWrapping.BeforeElse)
2904 addUnwrappedLine();
2905 else
2906 NeedsUnwrappedLine = true;
2907 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2908 addUnwrappedLine();
2909 } else {
2910 parseUnbracedBody();
2911 }
2912
2913 if (Style.RemoveBracesLLVM) {
2914 assert(!NestedTooDeep.empty());
2915 KeepIfBraces = KeepIfBraces ||
2916 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2917 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2918 IfBlockKind == IfStmtKind::IfElseIf;
2919 }
2920
2921 bool KeepElseBraces = KeepIfBraces;
2922 FormatToken *ElseLeftBrace = nullptr;
2923 IfStmtKind Kind = IfStmtKind::IfOnly;
2924
2925 if (FormatTok->is(tok::kw_else)) {
2926 if (Style.RemoveBracesLLVM) {
2927 NestedTooDeep.back() = false;
2928 Kind = IfStmtKind::IfElse;
2929 }
2930 nextToken();
2931 handleAttributes();
2932 if (isBlockBegin(*FormatTok)) {
2933 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2934 FormatTok->setFinalizedType(TT_ElseLBrace);
2935 ElseLeftBrace = FormatTok;
2936 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2937 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2938 FormatToken *IfLBrace =
2939 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2940 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2941 setPreviousRBraceType(TT_ElseRBrace);
2942 if (FormatTok->is(tok::kw_else)) {
2943 KeepElseBraces = KeepElseBraces ||
2944 ElseBlockKind == IfStmtKind::IfOnly ||
2945 ElseBlockKind == IfStmtKind::IfElseIf;
2946 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2947 KeepElseBraces = true;
2948 assert(ElseLeftBrace->MatchingParen);
2949 markOptionalBraces(ElseLeftBrace);
2950 }
2951 addUnwrappedLine();
2952 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2953 const FormatToken *Previous = Tokens->getPreviousToken();
2954 assert(Previous);
2955 const bool IsPrecededByComment = Previous->is(tok::comment);
2956 if (IsPrecededByComment) {
2957 addUnwrappedLine();
2958 ++Line->Level;
2959 }
2960 bool TooDeep = true;
2961 if (Style.RemoveBracesLLVM) {
2962 Kind = IfStmtKind::IfElseIf;
2963 TooDeep = NestedTooDeep.pop_back_val();
2964 }
2965 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2966 if (Style.RemoveBracesLLVM)
2967 NestedTooDeep.push_back(TooDeep);
2968 if (IsPrecededByComment)
2969 --Line->Level;
2970 } else {
2971 parseUnbracedBody(/*CheckEOF=*/true);
2972 }
2973 } else {
2974 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2975 if (NeedsUnwrappedLine)
2976 addUnwrappedLine();
2977 }
2978
2979 if (!Style.RemoveBracesLLVM)
2980 return nullptr;
2981
2982 assert(!NestedTooDeep.empty());
2983 KeepElseBraces = KeepElseBraces ||
2984 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2985 NestedTooDeep.back();
2986
2987 NestedTooDeep.pop_back();
2988
2989 if (!KeepIfBraces && !KeepElseBraces) {
2990 markOptionalBraces(IfLeftBrace);
2991 markOptionalBraces(ElseLeftBrace);
2992 } else if (IfLeftBrace) {
2993 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2994 if (IfRightBrace) {
2995 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2996 assert(!IfLeftBrace->Optional);
2997 assert(!IfRightBrace->Optional);
2998 IfLeftBrace->MatchingParen = nullptr;
2999 IfRightBrace->MatchingParen = nullptr;
3000 }
3001 }
3002
3003 if (IfKind)
3004 *IfKind = Kind;
3005
3006 return IfLeftBrace;
3007}
3008
3009void UnwrappedLineParser::parseTryCatch() {
3010 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3011 nextToken();
3012 bool NeedsUnwrappedLine = false;
3013 bool HasCtorInitializer = false;
3014 if (FormatTok->is(tok::colon)) {
3015 auto *Colon = FormatTok;
3016 // We are in a function try block, what comes is an initializer list.
3017 nextToken();
3018 if (FormatTok->is(tok::identifier)) {
3019 HasCtorInitializer = true;
3020 Colon->setFinalizedType(TT_CtorInitializerColon);
3021 }
3022
3023 // In case identifiers were removed by clang-tidy, what might follow is
3024 // multiple commas in sequence - before the first identifier.
3025 while (FormatTok->is(tok::comma))
3026 nextToken();
3027
3028 while (FormatTok->is(tok::identifier)) {
3029 nextToken();
3030 if (FormatTok->is(tok::l_paren)) {
3031 parseParens();
3032 } else if (FormatTok->is(tok::l_brace)) {
3033 nextToken();
3034 parseBracedList();
3035 }
3036
3037 // In case identifiers were removed by clang-tidy, what might follow is
3038 // multiple commas in sequence - after the first identifier.
3039 while (FormatTok->is(tok::comma))
3040 nextToken();
3041 }
3042 }
3043 // Parse try with resource.
3044 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3045 parseParens();
3046
3047 keepAncestorBraces();
3048
3049 if (FormatTok->is(tok::l_brace)) {
3050 if (HasCtorInitializer)
3051 FormatTok->setFinalizedType(TT_FunctionLBrace);
3052 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3053 parseBlock();
3054 if (Style.BraceWrapping.BeforeCatch)
3055 addUnwrappedLine();
3056 else
3057 NeedsUnwrappedLine = true;
3058 } else if (FormatTok->isNot(tok::kw_catch)) {
3059 // The C++ standard requires a compound-statement after a try.
3060 // If there's none, we try to assume there's a structuralElement
3061 // and try to continue.
3062 addUnwrappedLine();
3063 ++Line->Level;
3064 parseStructuralElement();
3065 --Line->Level;
3066 }
3067 while (true) {
3068 if (FormatTok->is(tok::at))
3069 nextToken();
3070 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3071 tok::kw___finally) ||
3072 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3073 FormatTok->is(Keywords.kw_finally)) ||
3074 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3075 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3076 break;
3077 }
3078 nextToken();
3079 while (FormatTok->isNot(tok::l_brace)) {
3080 if (FormatTok->is(tok::l_paren)) {
3081 parseParens();
3082 continue;
3083 }
3084 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3085 if (Style.RemoveBracesLLVM)
3086 NestedTooDeep.pop_back();
3087 return;
3088 }
3089 nextToken();
3090 }
3091 NeedsUnwrappedLine = false;
3092 Line->MustBeDeclaration = false;
3093 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3094 parseBlock();
3095 if (Style.BraceWrapping.BeforeCatch)
3096 addUnwrappedLine();
3097 else
3098 NeedsUnwrappedLine = true;
3099 }
3100
3101 if (Style.RemoveBracesLLVM)
3102 NestedTooDeep.pop_back();
3103
3104 if (NeedsUnwrappedLine)
3105 addUnwrappedLine();
3106}
3107
3108void UnwrappedLineParser::parseNamespace() {
3109 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3110 "'namespace' expected");
3111
3112 const FormatToken &InitialToken = *FormatTok;
3113 nextToken();
3114 if (InitialToken.is(TT_NamespaceMacro)) {
3115 parseParens();
3116 } else {
3117 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3118 tok::l_square, tok::period, tok::l_paren) ||
3119 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3120 if (FormatTok->is(tok::l_square))
3121 parseSquare();
3122 else if (FormatTok->is(tok::l_paren))
3123 parseParens();
3124 else
3125 nextToken();
3126 }
3127 }
3128 if (FormatTok->is(tok::l_brace)) {
3129 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3130
3131 if (ShouldBreakBeforeBrace(Style, InitialToken))
3132 addUnwrappedLine();
3133
3134 unsigned AddLevels =
3137 DeclarationScopeStack.size() > 1)
3138 ? 1u
3139 : 0u;
3140 bool ManageWhitesmithsBraces =
3141 AddLevels == 0u &&
3143
3144 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3145 // the whole block.
3146 if (ManageWhitesmithsBraces)
3147 ++Line->Level;
3148
3149 // Munch the semicolon after a namespace. This is more common than one would
3150 // think. Putting the semicolon into its own line is very ugly.
3151 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3152 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3153 ManageWhitesmithsBraces);
3154
3155 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3156
3157 if (ManageWhitesmithsBraces)
3158 --Line->Level;
3159 }
3160 // FIXME: Add error handling.
3161}
3162
3163void UnwrappedLineParser::parseNew() {
3164 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3165 nextToken();
3166
3167 if (Style.isCSharp()) {
3168 do {
3169 // Handle constructor invocation, e.g. `new(field: value)`.
3170 if (FormatTok->is(tok::l_paren))
3171 parseParens();
3172
3173 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3174 if (FormatTok->is(tok::l_brace))
3175 parseBracedList();
3176
3177 if (FormatTok->isOneOf(tok::semi, tok::comma))
3178 return;
3179
3180 nextToken();
3181 } while (!eof());
3182 }
3183
3184 if (Style.Language != FormatStyle::LK_Java)
3185 return;
3186
3187 // In Java, we can parse everything up to the parens, which aren't optional.
3188 do {
3189 // There should not be a ;, { or } before the new's open paren.
3190 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3191 return;
3192
3193 // Consume the parens.
3194 if (FormatTok->is(tok::l_paren)) {
3195 parseParens();
3196
3197 // If there is a class body of an anonymous class, consume that as child.
3198 if (FormatTok->is(tok::l_brace))
3199 parseChildBlock();
3200 return;
3201 }
3202 nextToken();
3203 } while (!eof());
3204}
3205
3206void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3207 keepAncestorBraces();
3208
3209 if (isBlockBegin(*FormatTok)) {
3210 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3211 FormatToken *LeftBrace = FormatTok;
3212 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3213 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3214 /*MunchSemi=*/true, KeepBraces);
3215 setPreviousRBraceType(TT_ControlStatementRBrace);
3216 if (!KeepBraces) {
3217 assert(!NestedTooDeep.empty());
3218 if (!NestedTooDeep.back())
3219 markOptionalBraces(LeftBrace);
3220 }
3221 if (WrapRightBrace)
3222 addUnwrappedLine();
3223 } else {
3224 parseUnbracedBody();
3225 }
3226
3227 if (!KeepBraces)
3228 NestedTooDeep.pop_back();
3229}
3230
3231void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3232 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3233 (Style.isVerilog() &&
3234 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3235 Keywords.kw_always_ff, Keywords.kw_always_latch,
3236 Keywords.kw_final, Keywords.kw_initial,
3237 Keywords.kw_foreach, Keywords.kw_forever,
3238 Keywords.kw_repeat))) &&
3239 "'for', 'while' or foreach macro expected");
3240 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3241 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3242
3243 nextToken();
3244 // JS' for await ( ...
3245 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3246 nextToken();
3247 if (IsCpp && FormatTok->is(tok::kw_co_await))
3248 nextToken();
3249 if (HasParens && FormatTok->is(tok::l_paren)) {
3250 // The type is only set for Verilog basically because we were afraid to
3251 // change the existing behavior for loops. See the discussion on D121756 for
3252 // details.
3253 if (Style.isVerilog())
3254 FormatTok->setFinalizedType(TT_ConditionLParen);
3255 parseParens();
3256 }
3257
3258 if (Style.isVerilog()) {
3259 // Event control.
3260 parseVerilogSensitivityList();
3261 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3262 Tokens->getPreviousToken()->is(tok::r_paren)) {
3263 nextToken();
3264 addUnwrappedLine();
3265 return;
3266 }
3267
3268 handleAttributes();
3269 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3270}
3271
3272void UnwrappedLineParser::parseDoWhile() {
3273 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3274 nextToken();
3275
3276 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3277
3278 // FIXME: Add error handling.
3279 if (FormatTok->isNot(tok::kw_while)) {
3280 addUnwrappedLine();
3281 return;
3282 }
3283
3284 FormatTok->setFinalizedType(TT_DoWhile);
3285
3286 // If in Whitesmiths mode, the line with the while() needs to be indented
3287 // to the same level as the block.
3289 ++Line->Level;
3290
3291 nextToken();
3292 parseStructuralElement();
3293}
3294
3295void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3296 nextToken();
3297 unsigned OldLineLevel = Line->Level;
3298
3299 if (LeftAlignLabel)
3300 Line->Level = 0;
3301 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3302 --Line->Level;
3303
3304 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3305 FormatTok->is(tok::l_brace)) {
3306
3307 CompoundStatementIndenter Indenter(this, Line->Level,
3310 parseBlock();
3311 if (FormatTok->is(tok::kw_break)) {
3314 addUnwrappedLine();
3315 if (!Style.IndentCaseBlocks &&
3317 ++Line->Level;
3318 }
3319 }
3320 parseStructuralElement();
3321 }
3322 addUnwrappedLine();
3323 } else {
3324 if (FormatTok->is(tok::semi))
3325 nextToken();
3326 addUnwrappedLine();
3327 }
3328 Line->Level = OldLineLevel;
3329 if (FormatTok->isNot(tok::l_brace)) {
3330 parseStructuralElement();
3331 addUnwrappedLine();
3332 }
3333}
3334
3335void UnwrappedLineParser::parseCaseLabel() {
3336 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3337 auto *Case = FormatTok;
3338
3339 // FIXME: fix handling of complex expressions here.
3340 do {
3341 nextToken();
3342 if (FormatTok->is(tok::colon)) {
3343 FormatTok->setFinalizedType(TT_CaseLabelColon);
3344 break;
3345 }
3346 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3347 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3348 Case->setFinalizedType(TT_SwitchExpressionLabel);
3349 break;
3350 }
3351 } while (!eof());
3352 parseLabel();
3353}
3354
3355void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3356 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3357 nextToken();
3358 if (FormatTok->is(tok::l_paren))
3359 parseParens();
3360
3361 keepAncestorBraces();
3362
3363 if (FormatTok->is(tok::l_brace)) {
3364 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3365 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3366 : TT_ControlStatementLBrace);
3367 if (IsExpr)
3368 parseChildBlock();
3369 else
3370 parseBlock();
3371 setPreviousRBraceType(TT_ControlStatementRBrace);
3372 if (!IsExpr)
3373 addUnwrappedLine();
3374 } else {
3375 addUnwrappedLine();
3376 ++Line->Level;
3377 parseStructuralElement();
3378 --Line->Level;
3379 }
3380
3381 if (Style.RemoveBracesLLVM)
3382 NestedTooDeep.pop_back();
3383}
3384
3385// Operators that can follow a C variable.
3387 switch (Kind) {
3388 case tok::ampamp:
3389 case tok::ampequal:
3390 case tok::arrow:
3391 case tok::caret:
3392 case tok::caretequal:
3393 case tok::comma:
3394 case tok::ellipsis:
3395 case tok::equal:
3396 case tok::equalequal:
3397 case tok::exclaim:
3398 case tok::exclaimequal:
3399 case tok::greater:
3400 case tok::greaterequal:
3401 case tok::greatergreater:
3402 case tok::greatergreaterequal:
3403 case tok::l_paren:
3404 case tok::l_square:
3405 case tok::less:
3406 case tok::lessequal:
3407 case tok::lessless:
3408 case tok::lesslessequal:
3409 case tok::minus:
3410 case tok::minusequal:
3411 case tok::minusminus:
3412 case tok::percent:
3413 case tok::percentequal:
3414 case tok::period:
3415 case tok::pipe:
3416 case tok::pipeequal:
3417 case tok::pipepipe:
3418 case tok::plus:
3419 case tok::plusequal:
3420 case tok::plusplus:
3421 case tok::question:
3422 case tok::r_brace:
3423 case tok::r_paren:
3424 case tok::r_square:
3425 case tok::semi:
3426 case tok::slash:
3427 case tok::slashequal:
3428 case tok::star:
3429 case tok::starequal:
3430 return true;
3431 default:
3432 return false;
3433 }
3434}
3435
3436void UnwrappedLineParser::parseAccessSpecifier() {
3437 FormatToken *AccessSpecifierCandidate = FormatTok;
3438 nextToken();
3439 // Understand Qt's slots.
3440 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3441 nextToken();
3442 // Otherwise, we don't know what it is, and we'd better keep the next token.
3443 if (FormatTok->is(tok::colon)) {
3444 nextToken();
3445 addUnwrappedLine();
3446 } else if (FormatTok->isNot(tok::coloncolon) &&
3447 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3448 // Not a variable name nor namespace name.
3449 addUnwrappedLine();
3450 } else if (AccessSpecifierCandidate) {
3451 // Consider the access specifier to be a C identifier.
3452 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3453 }
3454}
3455
3456/// \brief Parses a requires, decides if it is a clause or an expression.
3457/// \pre The current token has to be the requires keyword.
3458/// \returns true if it parsed a clause.
3459bool UnwrappedLineParser::parseRequires() {
3460 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3461 auto RequiresToken = FormatTok;
3462
3463 // We try to guess if it is a requires clause, or a requires expression. For
3464 // that we first consume the keyword and check the next token.
3465 nextToken();
3466
3467 switch (FormatTok->Tok.getKind()) {
3468 case tok::l_brace:
3469 // This can only be an expression, never a clause.
3470 parseRequiresExpression(RequiresToken);
3471 return false;
3472 case tok::l_paren:
3473 // Clauses and expression can start with a paren, it's unclear what we have.
3474 break;
3475 default:
3476 // All other tokens can only be a clause.
3477 parseRequiresClause(RequiresToken);
3478 return true;
3479 }
3480
3481 // Looking forward we would have to decide if there are function declaration
3482 // like arguments to the requires expression:
3483 // requires (T t) {
3484 // Or there is a constraint expression for the requires clause:
3485 // requires (C<T> && ...
3486
3487 // But first let's look behind.
3488 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3489
3490 if (!PreviousNonComment ||
3491 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3492 // If there is no token, or an expression left brace, we are a requires
3493 // clause within a requires expression.
3494 parseRequiresClause(RequiresToken);
3495 return true;
3496 }
3497
3498 switch (PreviousNonComment->Tok.getKind()) {
3499 case tok::greater:
3500 case tok::r_paren:
3501 case tok::kw_noexcept:
3502 case tok::kw_const:
3503 case tok::amp:
3504 // This is a requires clause.
3505 parseRequiresClause(RequiresToken);
3506 return true;
3507 case tok::ampamp: {
3508 // This can be either:
3509 // if (... && requires (T t) ...)
3510 // Or
3511 // void member(...) && requires (C<T> ...
3512 // We check the one token before that for a const:
3513 // void member(...) const && requires (C<T> ...
3514 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3515 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3516 parseRequiresClause(RequiresToken);
3517 return true;
3518 }
3519 break;
3520 }
3521 default:
3522 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3523 // This is a requires clause.
3524 parseRequiresClause(RequiresToken);
3525 return true;
3526 }
3527 // It's an expression.
3528 parseRequiresExpression(RequiresToken);
3529 return false;
3530 }
3531
3532 // Now we look forward and try to check if the paren content is a parameter
3533 // list. The parameters can be cv-qualified and contain references or
3534 // pointers.
3535 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3536 // of stuff: typename, const, *, &, &&, ::, identifiers.
3537
3538 unsigned StoredPosition = Tokens->getPosition();
3539 FormatToken *NextToken = Tokens->getNextToken();
3540 int Lookahead = 0;
3541 auto PeekNext = [&Lookahead, &NextToken, this] {
3542 ++Lookahead;
3543 NextToken = Tokens->getNextToken();
3544 };
3545
3546 bool FoundType = false;
3547 bool LastWasColonColon = false;
3548 int OpenAngles = 0;
3549
3550 for (; Lookahead < 50; PeekNext()) {
3551 switch (NextToken->Tok.getKind()) {
3552 case tok::kw_volatile:
3553 case tok::kw_const:
3554 case tok::comma:
3555 if (OpenAngles == 0) {
3556 FormatTok = Tokens->setPosition(StoredPosition);
3557 parseRequiresExpression(RequiresToken);
3558 return false;
3559 }
3560 break;
3561 case tok::eof:
3562 // Break out of the loop.
3563 Lookahead = 50;
3564 break;
3565 case tok::coloncolon:
3566 LastWasColonColon = true;
3567 break;
3568 case tok::kw_decltype:
3569 case tok::identifier:
3570 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3571 FormatTok = Tokens->setPosition(StoredPosition);
3572 parseRequiresExpression(RequiresToken);
3573 return false;
3574 }
3575 FoundType = true;
3576 LastWasColonColon = false;
3577 break;
3578 case tok::less:
3579 ++OpenAngles;
3580 break;
3581 case tok::greater:
3582 --OpenAngles;
3583 break;
3584 default:
3585 if (NextToken->isTypeName(LangOpts)) {
3586 FormatTok = Tokens->setPosition(StoredPosition);
3587 parseRequiresExpression(RequiresToken);
3588 return false;
3589 }
3590 break;
3591 }
3592 }
3593 // This seems to be a complicated expression, just assume it's a clause.
3594 FormatTok = Tokens->setPosition(StoredPosition);
3595 parseRequiresClause(RequiresToken);
3596 return true;
3597}
3598
3599/// \brief Parses a requires clause.
3600/// \param RequiresToken The requires keyword token, which starts this clause.
3601/// \pre We need to be on the next token after the requires keyword.
3602/// \sa parseRequiresExpression
3603///
3604/// Returns if it either has finished parsing the clause, or it detects, that
3605/// the clause is incorrect.
3606void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3607 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3608 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3609
3610 // If there is no previous token, we are within a requires expression,
3611 // otherwise we will always have the template or function declaration in front
3612 // of it.
3613 bool InRequiresExpression =
3614 !RequiresToken->Previous ||
3615 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3616
3617 RequiresToken->setFinalizedType(InRequiresExpression
3618 ? TT_RequiresClauseInARequiresExpression
3619 : TT_RequiresClause);
3620
3621 // NOTE: parseConstraintExpression is only ever called from this function.
3622 // It could be inlined into here.
3623 parseConstraintExpression();
3624
3625 if (!InRequiresExpression)
3626 FormatTok->Previous->ClosesRequiresClause = true;
3627}
3628
3629/// \brief Parses a requires expression.
3630/// \param RequiresToken The requires keyword token, which starts this clause.
3631/// \pre We need to be on the next token after the requires keyword.
3632/// \sa parseRequiresClause
3633///
3634/// Returns if it either has finished parsing the expression, or it detects,
3635/// that the expression is incorrect.
3636void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3637 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3638 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3639
3640 RequiresToken->setFinalizedType(TT_RequiresExpression);
3641
3642 if (FormatTok->is(tok::l_paren)) {
3643 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3644 parseParens();
3645 }
3646
3647 if (FormatTok->is(tok::l_brace)) {
3648 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3649 parseChildBlock();
3650 }
3651}
3652
3653/// \brief Parses a constraint expression.
3654///
3655/// This is the body of a requires clause. It returns, when the parsing is
3656/// complete, or the expression is incorrect.
3657void UnwrappedLineParser::parseConstraintExpression() {
3658 // The special handling for lambdas is needed since tryToParseLambda() eats a
3659 // token and if a requires expression is the last part of a requires clause
3660 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3661 // not set on the correct token. Thus we need to be aware if we even expect a
3662 // lambda to be possible.
3663 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3664 bool LambdaNextTimeAllowed = true;
3665
3666 // Within lambda declarations, it is permitted to put a requires clause after
3667 // its template parameter list, which would place the requires clause right
3668 // before the parentheses of the parameters of the lambda declaration. Thus,
3669 // we track if we expect to see grouping parentheses at all.
3670 // Without this check, `requires foo<T> (T t)` in the below example would be
3671 // seen as the whole requires clause, accidentally eating the parameters of
3672 // the lambda.
3673 // [&]<typename T> requires foo<T> (T t) { ... };
3674 bool TopLevelParensAllowed = true;
3675
3676 do {
3677 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3678
3679 switch (FormatTok->Tok.getKind()) {
3680 case tok::kw_requires: {
3681 auto RequiresToken = FormatTok;
3682 nextToken();
3683 parseRequiresExpression(RequiresToken);
3684 break;
3685 }
3686
3687 case tok::l_paren:
3688 if (!TopLevelParensAllowed)
3689 return;
3690 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3691 TopLevelParensAllowed = false;
3692 break;
3693
3694 case tok::l_square:
3695 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3696 return;
3697 break;
3698
3699 case tok::kw_const:
3700 case tok::semi:
3701 case tok::kw_class:
3702 case tok::kw_struct:
3703 case tok::kw_union:
3704 return;
3705
3706 case tok::l_brace:
3707 // Potential function body.
3708 return;
3709
3710 case tok::ampamp:
3711 case tok::pipepipe:
3712 FormatTok->setFinalizedType(TT_BinaryOperator);
3713 nextToken();
3714 LambdaNextTimeAllowed = true;
3715 TopLevelParensAllowed = true;
3716 break;
3717
3718 case tok::comma:
3719 case tok::comment:
3720 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3721 nextToken();
3722 break;
3723
3724 case tok::kw_sizeof:
3725 case tok::greater:
3726 case tok::greaterequal:
3727 case tok::greatergreater:
3728 case tok::less:
3729 case tok::lessequal:
3730 case tok::lessless:
3731 case tok::equalequal:
3732 case tok::exclaim:
3733 case tok::exclaimequal:
3734 case tok::plus:
3735 case tok::minus:
3736 case tok::star:
3737 case tok::slash:
3738 LambdaNextTimeAllowed = true;
3739 TopLevelParensAllowed = true;
3740 // Just eat them.
3741 nextToken();
3742 break;
3743
3744 case tok::numeric_constant:
3745 case tok::coloncolon:
3746 case tok::kw_true:
3747 case tok::kw_false:
3748 TopLevelParensAllowed = false;
3749 // Just eat them.
3750 nextToken();
3751 break;
3752
3753 case tok::kw_static_cast:
3754 case tok::kw_const_cast:
3755 case tok::kw_reinterpret_cast:
3756 case tok::kw_dynamic_cast:
3757 nextToken();
3758 if (FormatTok->isNot(tok::less))
3759 return;
3760
3761 nextToken();
3762 parseBracedList(/*IsAngleBracket=*/true);
3763 break;
3764
3765 default:
3766 if (!FormatTok->Tok.getIdentifierInfo()) {
3767 // Identifiers are part of the default case, we check for more then
3768 // tok::identifier to handle builtin type traits.
3769 return;
3770 }
3771
3772 // We need to differentiate identifiers for a template deduction guide,
3773 // variables, or function return types (the constraint expression has
3774 // ended before that), and basically all other cases. But it's easier to
3775 // check the other way around.
3776 assert(FormatTok->Previous);
3777 switch (FormatTok->Previous->Tok.getKind()) {
3778 case tok::coloncolon: // Nested identifier.
3779 case tok::ampamp: // Start of a function or variable for the
3780 case tok::pipepipe: // constraint expression. (binary)
3781 case tok::exclaim: // The same as above, but unary.
3782 case tok::kw_requires: // Initial identifier of a requires clause.
3783 case tok::equal: // Initial identifier of a concept declaration.
3784 break;
3785 default:
3786 return;
3787 }
3788
3789 // Read identifier with optional template declaration.
3790 nextToken();
3791 if (FormatTok->is(tok::less)) {
3792 nextToken();
3793 parseBracedList(/*IsAngleBracket=*/true);
3794 }
3795 TopLevelParensAllowed = false;
3796 break;
3797 }
3798 } while (!eof());
3799}
3800
3801bool UnwrappedLineParser::parseEnum() {
3802 const FormatToken &InitialToken = *FormatTok;
3803
3804 // Won't be 'enum' for NS_ENUMs.
3805 if (FormatTok->is(tok::kw_enum))
3806 nextToken();
3807
3808 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3809 // declarations. An "enum" keyword followed by a colon would be a syntax
3810 // error and thus assume it is just an identifier.
3811 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3812 return false;
3813
3814 // In protobuf, "enum" can be used as a field name.
3815 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3816 return false;
3817
3818 if (IsCpp) {
3819 // Eat up enum class ...
3820 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3821 nextToken();
3822 while (FormatTok->is(tok::l_square))
3823 if (!handleCppAttributes())
3824 return false;
3825 }
3826
3827 while (FormatTok->Tok.getIdentifierInfo() ||
3828 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3829 tok::greater, tok::comma, tok::question,
3830 tok::l_square)) {
3831 if (Style.isVerilog()) {
3832 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3833 nextToken();
3834 // In Verilog the base type can have dimensions.
3835 while (FormatTok->is(tok::l_square))
3836 parseSquare();
3837 } else {
3838 nextToken();
3839 }
3840 // We can have macros or attributes in between 'enum' and the enum name.
3841 if (FormatTok->is(tok::l_paren))
3842 parseParens();
3843 if (FormatTok->is(tok::identifier)) {
3844 nextToken();
3845 // If there are two identifiers in a row, this is likely an elaborate
3846 // return type. In Java, this can be "implements", etc.
3847 if (IsCpp && FormatTok->is(tok::identifier))
3848 return false;
3849 }
3850 }
3851
3852 // Just a declaration or something is wrong.
3853 if (FormatTok->isNot(tok::l_brace))
3854 return true;
3855 FormatTok->setFinalizedType(TT_EnumLBrace);
3856 FormatTok->setBlockKind(BK_Block);
3857
3858 if (Style.Language == FormatStyle::LK_Java) {
3859 // Java enums are different.
3860 parseJavaEnumBody();
3861 return true;
3862 }
3863 if (Style.Language == FormatStyle::LK_Proto) {
3864 parseBlock(/*MustBeDeclaration=*/true);
3865 return true;
3866 }
3867
3868 if (!Style.AllowShortEnumsOnASingleLine &&
3869 ShouldBreakBeforeBrace(Style, InitialToken)) {
3870 addUnwrappedLine();
3871 }
3872 // Parse enum body.
3873 nextToken();
3874 if (!Style.AllowShortEnumsOnASingleLine) {
3875 addUnwrappedLine();
3876 Line->Level += 1;
3877 }
3878 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3880 Line->Level -= 1;
3881 if (HasError) {
3882 if (FormatTok->is(tok::semi))
3883 nextToken();
3884 addUnwrappedLine();
3885 }
3886 setPreviousRBraceType(TT_EnumRBrace);
3887 return true;
3888
3889 // There is no addUnwrappedLine() here so that we fall through to parsing a
3890 // structural element afterwards. Thus, in "enum A {} n, m;",
3891 // "} n, m;" will end up in one unwrapped line.
3892}
3893
3894bool UnwrappedLineParser::parseStructLike() {
3895 // parseRecord falls through and does not yet add an unwrapped line as a
3896 // record declaration or definition can start a structural element.
3897 parseRecord();
3898 // This does not apply to Java, JavaScript and C#.
3899 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3900 Style.isCSharp()) {
3901 if (FormatTok->is(tok::semi))
3902 nextToken();
3903 addUnwrappedLine();
3904 return true;
3905 }
3906 return false;
3907}
3908
3909namespace {
3910// A class used to set and restore the Token position when peeking
3911// ahead in the token source.
3912class ScopedTokenPosition {
3913 unsigned StoredPosition;
3914 FormatTokenSource *Tokens;
3915
3916public:
3917 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3918 assert(Tokens && "Tokens expected to not be null");
3919 StoredPosition = Tokens->getPosition();
3920 }
3921
3922 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3923};
3924} // namespace
3925
3926// Look to see if we have [[ by looking ahead, if
3927// its not then rewind to the original position.
3928bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3929 ScopedTokenPosition AutoPosition(Tokens);
3930 FormatToken *Tok = Tokens->getNextToken();
3931 // We already read the first [ check for the second.
3932 if (Tok->isNot(tok::l_square))
3933 return false;
3934 // Double check that the attribute is just something
3935 // fairly simple.
3936 while (Tok->isNot(tok::eof)) {
3937 if (Tok->is(tok::r_square))
3938 break;
3939 Tok = Tokens->getNextToken();
3940 }
3941 if (Tok->is(tok::eof))
3942 return false;
3943 Tok = Tokens->getNextToken();
3944 if (Tok->isNot(tok::r_square))
3945 return false;
3946 Tok = Tokens->getNextToken();
3947 if (Tok->is(tok::semi))
3948 return false;
3949 return true;
3950}
3951
3952void UnwrappedLineParser::parseJavaEnumBody() {
3953 assert(FormatTok->is(tok::l_brace));
3954 const FormatToken *OpeningBrace = FormatTok;
3955
3956 // Determine whether the enum is simple, i.e. does not have a semicolon or
3957 // constants with class bodies. Simple enums can be formatted like braced
3958 // lists, contracted to a single line, etc.
3959 unsigned StoredPosition = Tokens->getPosition();
3960 bool IsSimple = true;
3961 FormatToken *Tok = Tokens->getNextToken();
3962 while (Tok->isNot(tok::eof)) {
3963 if (Tok->is(tok::r_brace))
3964 break;
3965 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3966 IsSimple = false;
3967 break;
3968 }
3969 // FIXME: This will also mark enums with braces in the arguments to enum
3970 // constants as "not simple". This is probably fine in practice, though.
3971 Tok = Tokens->getNextToken();
3972 }
3973 FormatTok = Tokens->setPosition(StoredPosition);
3974
3975 if (IsSimple) {
3976 nextToken();
3977 parseBracedList();
3978 addUnwrappedLine();
3979 return;
3980 }
3981
3982 // Parse the body of a more complex enum.
3983 // First add a line for everything up to the "{".
3984 nextToken();
3985 addUnwrappedLine();
3986 ++Line->Level;
3987
3988 // Parse the enum constants.
3989 while (!eof()) {
3990 if (FormatTok->is(tok::l_brace)) {
3991 // Parse the constant's class body.
3992 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3993 /*MunchSemi=*/false);
3994 } else if (FormatTok->is(tok::l_paren)) {
3995 parseParens();
3996 } else if (FormatTok->is(tok::comma)) {
3997 nextToken();
3998 addUnwrappedLine();
3999 } else if (FormatTok->is(tok::semi)) {
4000 nextToken();
4001 addUnwrappedLine();
4002 break;
4003 } else if (FormatTok->is(tok::r_brace)) {
4004 addUnwrappedLine();
4005 break;
4006 } else {
4007 nextToken();
4008 }
4009 }
4010
4011 // Parse the class body after the enum's ";" if any.
4012 parseLevel(OpeningBrace);
4013 nextToken();
4014 --Line->Level;
4015 addUnwrappedLine();
4016}
4017
4018void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
4019 const FormatToken &InitialToken = *FormatTok;
4020 nextToken();
4021
4022 const FormatToken *ClassName = nullptr;
4023 bool IsDerived = false;
4024 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4025 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4026 };
4027 // JavaScript/TypeScript supports anonymous classes like:
4028 // a = class extends foo { }
4029 bool JSPastExtendsOrImplements = false;
4030 // The actual identifier can be a nested name specifier, and in macros
4031 // it is often token-pasted.
4032 // An [[attribute]] can be before the identifier.
4033 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4034 tok::kw_alignas, tok::l_square) ||
4035 FormatTok->isAttribute() ||
4036 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4037 FormatTok->isOneOf(tok::period, tok::comma))) {
4038 if (Style.isJavaScript() &&
4039 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4040 JSPastExtendsOrImplements = true;
4041 // JavaScript/TypeScript supports inline object types in
4042 // extends/implements positions:
4043 // class Foo implements {bar: number} { }
4044 nextToken();
4045 if (FormatTok->is(tok::l_brace)) {
4046 tryToParseBracedList();
4047 continue;
4048 }
4049 }
4050 if (FormatTok->is(tok::l_square) && handleCppAttributes())
4051 continue;
4052 const auto *Previous = FormatTok;
4053 nextToken();
4054 switch (FormatTok->Tok.getKind()) {
4055 case tok::l_paren:
4056 // We can have macros in between 'class' and the class name.
4057 if (!IsNonMacroIdentifier(Previous) ||
4058 // e.g. `struct macro(a) S { int i; };`
4059 Previous->Previous == &InitialToken) {
4060 parseParens();
4061 }
4062 break;
4063 case tok::coloncolon:
4064 case tok::hashhash:
4065 break;
4066 default:
4067 if (!JSPastExtendsOrImplements && !ClassName &&
4068 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4069 ClassName = Previous;
4070 }
4071 }
4072 }
4073
4074 auto IsListInitialization = [&] {
4075 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4076 return false;
4077 assert(FormatTok->is(tok::l_brace));
4078 const auto *Prev = FormatTok->getPreviousNonComment();
4079 assert(Prev);
4080 return Prev != ClassName && Prev->is(tok::identifier) &&
4081 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4082 };
4083
4084 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4085 int AngleNestingLevel = 0;
4086 do {
4087 if (FormatTok->is(tok::less))
4088 ++AngleNestingLevel;
4089 else if (FormatTok->is(tok::greater))
4090 --AngleNestingLevel;
4091
4092 if (AngleNestingLevel == 0) {
4093 if (FormatTok->is(tok::colon)) {
4094 IsDerived = true;
4095 } else if (FormatTok->is(tok::identifier) &&
4096 FormatTok->Previous->is(tok::coloncolon)) {
4097 ClassName = FormatTok;
4098 } else if (FormatTok->is(tok::l_paren) &&
4099 IsNonMacroIdentifier(FormatTok->Previous)) {
4100 break;
4101 }
4102 }
4103 if (FormatTok->is(tok::l_brace)) {
4104 if (AngleNestingLevel == 0 && IsListInitialization())
4105 return;
4106 calculateBraceTypes(/*ExpectClassBody=*/true);
4107 if (!tryToParseBracedList())
4108 break;
4109 }
4110 if (FormatTok->is(tok::l_square)) {
4111 FormatToken *Previous = FormatTok->Previous;
4112 if (!Previous || (Previous->isNot(tok::r_paren) &&
4113 !Previous->isTypeOrIdentifier(LangOpts))) {
4114 // Don't try parsing a lambda if we had a closing parenthesis before,
4115 // it was probably a pointer to an array: int (*)[].
4116 if (!tryToParseLambda())
4117 continue;
4118 } else {
4119 parseSquare();
4120 continue;
4121 }
4122 }
4123 if (FormatTok->is(tok::semi))
4124 return;
4125 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4126 addUnwrappedLine();
4127 nextToken();
4128 parseCSharpGenericTypeConstraint();
4129 break;
4130 }
4131 nextToken();
4132 } while (!eof());
4133 }
4134
4135 auto GetBraceTypes =
4136 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4137 switch (RecordTok.Tok.getKind()) {
4138 case tok::kw_class:
4139 return {TT_ClassLBrace, TT_ClassRBrace};
4140 case tok::kw_struct:
4141 return {TT_StructLBrace, TT_StructRBrace};
4142 case tok::kw_union:
4143 return {TT_UnionLBrace, TT_UnionRBrace};
4144 default:
4145 // Useful for e.g. interface.
4146 return {TT_RecordLBrace, TT_RecordRBrace};
4147 }
4148 };
4149 if (FormatTok->is(tok::l_brace)) {
4150 if (IsListInitialization())
4151 return;
4152 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4153 FormatTok->setFinalizedType(OpenBraceType);
4154 if (ParseAsExpr) {
4155 parseChildBlock();
4156 } else {
4157 if (ShouldBreakBeforeBrace(Style, InitialToken))
4158 addUnwrappedLine();
4159
4160 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4161 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4162 }
4163 setPreviousRBraceType(ClosingBraceType);
4164 }
4165 // There is no addUnwrappedLine() here so that we fall through to parsing a
4166 // structural element afterwards. Thus, in "class A {} n, m;",
4167 // "} n, m;" will end up in one unwrapped line.
4168}
4169
4170void UnwrappedLineParser::parseObjCMethod() {
4171 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4172 "'(' or identifier expected.");
4173 do {
4174 if (FormatTok->is(tok::semi)) {
4175 nextToken();
4176 addUnwrappedLine();
4177 return;
4178 } else if (FormatTok->is(tok::l_brace)) {
4179 if (Style.BraceWrapping.AfterFunction)
4180 addUnwrappedLine();
4181 parseBlock();
4182 addUnwrappedLine();
4183 return;
4184 } else {
4185 nextToken();
4186 }
4187 } while (!eof());
4188}
4189
4190void UnwrappedLineParser::parseObjCProtocolList() {
4191 assert(FormatTok->is(tok::less) && "'<' expected.");
4192 do {
4193 nextToken();
4194 // Early exit in case someone forgot a close angle.
4195 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4196 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4197 return;
4198 }
4199 } while (!eof() && FormatTok->isNot(tok::greater));
4200 nextToken(); // Skip '>'.
4201}
4202
4203void UnwrappedLineParser::parseObjCUntilAtEnd() {
4204 do {
4205 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4206 nextToken();
4207 addUnwrappedLine();
4208 break;
4209 }
4210 if (FormatTok->is(tok::l_brace)) {
4211 parseBlock();
4212 // In ObjC interfaces, nothing should be following the "}".
4213 addUnwrappedLine();
4214 } else if (FormatTok->is(tok::r_brace)) {
4215 // Ignore stray "}". parseStructuralElement doesn't consume them.
4216 nextToken();
4217 addUnwrappedLine();
4218 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4219 nextToken();
4220 parseObjCMethod();
4221 } else {
4222 parseStructuralElement();
4223 }
4224 } while (!eof());
4225}
4226
4227void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4228 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4229 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4230 nextToken();
4231 nextToken(); // interface name
4232
4233 // @interface can be followed by a lightweight generic
4234 // specialization list, then either a base class or a category.
4235 if (FormatTok->is(tok::less))
4236 parseObjCLightweightGenerics();
4237 if (FormatTok->is(tok::colon)) {
4238 nextToken();
4239 nextToken(); // base class name
4240 // The base class can also have lightweight generics applied to it.
4241 if (FormatTok->is(tok::less))
4242 parseObjCLightweightGenerics();
4243 } else if (FormatTok->is(tok::l_paren)) {
4244 // Skip category, if present.
4245 parseParens();
4246 }
4247
4248 if (FormatTok->is(tok::less))
4249 parseObjCProtocolList();
4250
4251 if (FormatTok->is(tok::l_brace)) {
4253 addUnwrappedLine();
4254 parseBlock(/*MustBeDeclaration=*/true);
4255 }
4256
4257 // With instance variables, this puts '}' on its own line. Without instance
4258 // variables, this ends the @interface line.
4259 addUnwrappedLine();
4260
4261 parseObjCUntilAtEnd();
4262}
4263
4264void UnwrappedLineParser::parseObjCLightweightGenerics() {
4265 assert(FormatTok->is(tok::less));
4266 // Unlike protocol lists, generic parameterizations support
4267 // nested angles:
4268 //
4269 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4270 // NSObject <NSCopying, NSSecureCoding>
4271 //
4272 // so we need to count how many open angles we have left.
4273 unsigned NumOpenAngles = 1;
4274 do {
4275 nextToken();
4276 // Early exit in case someone forgot a close angle.
4277 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4278 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4279 break;
4280 }
4281 if (FormatTok->is(tok::less)) {
4282 ++NumOpenAngles;
4283 } else if (FormatTok->is(tok::greater)) {
4284 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4285 --NumOpenAngles;
4286 }
4287 } while (!eof() && NumOpenAngles != 0);
4288 nextToken(); // Skip '>'.
4289}
4290
4291// Returns true for the declaration/definition form of @protocol,
4292// false for the expression form.
4293bool UnwrappedLineParser::parseObjCProtocol() {
4294 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4295 nextToken();
4296
4297 if (FormatTok->is(tok::l_paren)) {
4298 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4299 return false;
4300 }
4301
4302 // The definition/declaration form,
4303 // @protocol Foo
4304 // - (int)someMethod;
4305 // @end
4306
4307 nextToken(); // protocol name
4308
4309 if (FormatTok->is(tok::less))
4310 parseObjCProtocolList();
4311
4312 // Check for protocol declaration.
4313 if (FormatTok->is(tok::semi)) {
4314 nextToken();
4315 addUnwrappedLine();
4316 return true;
4317 }
4318
4319 addUnwrappedLine();
4320 parseObjCUntilAtEnd();
4321 return true;
4322}
4323
4324void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4325 bool IsImport = FormatTok->is(Keywords.kw_import);
4326 assert(IsImport || FormatTok->is(tok::kw_export));
4327 nextToken();
4328
4329 // Consume the "default" in "export default class/function".
4330 if (FormatTok->is(tok::kw_default))
4331 nextToken();
4332
4333 // Consume "async function", "function" and "default function", so that these
4334 // get parsed as free-standing JS functions, i.e. do not require a trailing
4335 // semicolon.
4336 if (FormatTok->is(Keywords.kw_async))
4337 nextToken();
4338 if (FormatTok->is(Keywords.kw_function)) {
4339 nextToken();
4340 return;
4341 }
4342
4343 // For imports, `export *`, `export {...}`, consume the rest of the line up
4344 // to the terminating `;`. For everything else, just return and continue
4345 // parsing the structural element, i.e. the declaration or expression for
4346 // `export default`.
4347 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4348 !FormatTok->isStringLiteral() &&
4349 !(FormatTok->is(Keywords.kw_type) &&
4350 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4351 return;
4352 }
4353
4354 while (!eof()) {
4355 if (FormatTok->is(tok::semi))
4356 return;
4357 if (Line->Tokens.empty()) {
4358 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4359 // import statement should terminate.
4360 return;
4361 }
4362 if (FormatTok->is(tok::l_brace)) {
4363 FormatTok->setBlockKind(BK_Block);
4364 nextToken();
4365 parseBracedList();
4366 } else {
4367 nextToken();
4368 }
4369 }
4370}
4371
4372void UnwrappedLineParser::parseStatementMacro() {
4373 nextToken();
4374 if (FormatTok->is(tok::l_paren))
4375 parseParens();
4376 if (FormatTok->is(tok::semi))
4377 nextToken();
4378 addUnwrappedLine();
4379}
4380
4381void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4382 // consume things like a::`b.c[d:e] or a::*
4383 while (true) {
4384 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4385 tok::coloncolon, tok::hash) ||
4386 Keywords.isVerilogIdentifier(*FormatTok)) {
4387 nextToken();
4388 } else if (FormatTok->is(tok::l_square)) {
4389 parseSquare();
4390 } else {
4391 break;
4392 }
4393 }
4394}
4395
4396void UnwrappedLineParser::parseVerilogSensitivityList() {
4397 if (FormatTok->isNot(tok::at))
4398 return;
4399 nextToken();
4400 // A block event expression has 2 at signs.
4401 if (FormatTok->is(tok::at))
4402 nextToken();
4403 switch (FormatTok->Tok.getKind()) {
4404 case tok::star:
4405 nextToken();
4406 break;
4407 case tok::l_paren:
4408 parseParens();
4409 break;
4410 default:
4411 parseVerilogHierarchyIdentifier();
4412 break;
4413 }
4414}
4415
4416unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4417 unsigned AddLevels = 0;
4418
4419 if (FormatTok->is(Keywords.kw_clocking)) {
4420 nextToken();
4421 if (Keywords.isVerilogIdentifier(*FormatTok))
4422 nextToken();
4423 parseVerilogSensitivityList();
4424 if (FormatTok->is(tok::semi))
4425 nextToken();
4426 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4427 Keywords.kw_casez, Keywords.kw_randcase,
4428 Keywords.kw_randsequence)) {
4429 if (Style.IndentCaseLabels)
4430 AddLevels++;
4431 nextToken();
4432 if (FormatTok->is(tok::l_paren)) {
4433 FormatTok->setFinalizedType(TT_ConditionLParen);
4434 parseParens();
4435 }
4436 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4437 nextToken();
4438 // The case header has no semicolon.
4439 } else {
4440 // "module" etc.
4441 nextToken();
4442 // all the words like the name of the module and specifiers like
4443 // "automatic" and the width of function return type
4444 while (true) {
4445 if (FormatTok->is(tok::l_square)) {
4446 auto Prev = FormatTok->getPreviousNonComment();
4447 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4448 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4449 parseSquare();
4450 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4451 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
4452 Keywords.kw_automatic, tok::kw_static)) {
4453 nextToken();
4454 } else {
4455 break;
4456 }
4457 }
4458
4459 auto NewLine = [this]() {
4460 addUnwrappedLine();
4461 Line->IsContinuation = true;
4462 };
4463
4464 // package imports
4465 while (FormatTok->is(Keywords.kw_import)) {
4466 NewLine();
4467 nextToken();
4468 parseVerilogHierarchyIdentifier();
4469 if (FormatTok->is(tok::semi))
4470 nextToken();
4471 }
4472
4473 // parameters and ports
4474 if (FormatTok->is(Keywords.kw_verilogHash)) {
4475 NewLine();
4476 nextToken();
4477 if (FormatTok->is(tok::l_paren)) {
4478 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4479 parseParens();
4480 }
4481 }
4482 if (FormatTok->is(tok::l_paren)) {
4483 NewLine();
4484 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4485 parseParens();
4486 }
4487
4488 // extends and implements
4489 if (FormatTok->is(Keywords.kw_extends)) {
4490 NewLine();
4491 nextToken();
4492 parseVerilogHierarchyIdentifier();
4493 if (FormatTok->is(tok::l_paren))
4494 parseParens();
4495 }
4496 if (FormatTok->is(Keywords.kw_implements)) {
4497 NewLine();
4498 do {
4499 nextToken();
4500 parseVerilogHierarchyIdentifier();
4501 } while (FormatTok->is(tok::comma));
4502 }
4503
4504 // Coverage event for cover groups.
4505 if (FormatTok->is(tok::at)) {
4506 NewLine();
4507 parseVerilogSensitivityList();
4508 }
4509
4510 if (FormatTok->is(tok::semi))
4511 nextToken(/*LevelDifference=*/1);
4512 addUnwrappedLine();
4513 }
4514
4515 return AddLevels;
4516}
4517
4518void UnwrappedLineParser::parseVerilogTable() {
4519 assert(FormatTok->is(Keywords.kw_table));
4520 nextToken(/*LevelDifference=*/1);
4521 addUnwrappedLine();
4522
4523 auto InitialLevel = Line->Level++;
4524 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4525 FormatToken *Tok = FormatTok;
4526 nextToken();
4527 if (Tok->is(tok::semi))
4528 addUnwrappedLine();
4529 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4530 Tok->setFinalizedType(TT_VerilogTableItem);
4531 }
4532 Line->Level = InitialLevel;
4533 nextToken(/*LevelDifference=*/-1);
4534 addUnwrappedLine();
4535}
4536
4537void UnwrappedLineParser::parseVerilogCaseLabel() {
4538 // The label will get unindented in AnnotatingParser. If there are no leading
4539 // spaces, indent the rest here so that things inside the block will be
4540 // indented relative to things outside. We don't use parseLabel because we
4541 // don't know whether this colon is a label or a ternary expression at this
4542 // point.
4543 auto OrigLevel = Line->Level;
4544 auto FirstLine = CurrentLines->size();
4545 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4546 ++Line->Level;
4547 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4548 --Line->Level;
4549 parseStructuralElement();
4550 // Restore the indentation in both the new line and the line that has the
4551 // label.
4552 if (CurrentLines->size() > FirstLine)
4553 (*CurrentLines)[FirstLine].Level = OrigLevel;
4554 Line->Level = OrigLevel;
4555}
4556
4557bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4558 for (const auto &N : Line.Tokens) {
4559 if (N.Tok->MacroCtx)
4560 return true;
4561 for (const UnwrappedLine &Child : N.Children)
4562 if (containsExpansion(Child))
4563 return true;
4564 }
4565 return false;
4566}
4567
4568void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4569 if (Line->Tokens.empty())
4570 return;
4571 LLVM_DEBUG({
4572 if (!parsingPPDirective()) {
4573 llvm::dbgs() << "Adding unwrapped line:\n";
4574 printDebugInfo(*Line);
4575 }
4576 });
4577
4578 // If this line closes a block when in Whitesmiths mode, remember that
4579 // information so that the level can be decreased after the line is added.
4580 // This has to happen after the addition of the line since the line itself
4581 // needs to be indented.
4582 bool ClosesWhitesmithsBlock =
4583 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4585
4586 // If the current line was expanded from a macro call, we use it to
4587 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4588 // line and the unexpanded token stream.
4589 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4590 if (!Reconstruct)
4591 Reconstruct.emplace(Line->Level, Unexpanded);
4592 Reconstruct->addLine(*Line);
4593
4594 // While the reconstructed unexpanded lines are stored in the normal
4595 // flow of lines, the expanded lines are stored on the side to be analyzed
4596 // in an extra step.
4597 CurrentExpandedLines.push_back(std::move(*Line));
4598
4599 if (Reconstruct->finished()) {
4600 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4601 assert(!Reconstructed.Tokens.empty() &&
4602 "Reconstructed must at least contain the macro identifier.");
4603 assert(!parsingPPDirective());
4604 LLVM_DEBUG({
4605 llvm::dbgs() << "Adding unexpanded line:\n";
4606 printDebugInfo(Reconstructed);
4607 });
4608 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4609 Lines.push_back(std::move(Reconstructed));
4610 CurrentExpandedLines.clear();
4611 Reconstruct.reset();
4612 }
4613 } else {
4614 // At the top level we only get here when no unexpansion is going on, or
4615 // when conditional formatting led to unfinished macro reconstructions.
4616 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4617 CurrentLines->push_back(std::move(*Line));
4618 }
4619 Line->Tokens.clear();
4620 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4621 Line->FirstStartColumn = 0;
4622 Line->IsContinuation = false;
4623 Line->SeenDecltypeAuto = false;
4624
4625 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4626 --Line->Level;
4627 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4628 CurrentLines->append(
4629 std::make_move_iterator(PreprocessorDirectives.begin()),
4630 std::make_move_iterator(PreprocessorDirectives.end()));
4631 PreprocessorDirectives.clear();
4632 }
4633 // Disconnect the current token from the last token on the previous line.
4634 FormatTok->Previous = nullptr;
4635}
4636
4637bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4638
4639bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4640 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4641 FormatTok.NewlinesBefore > 0;
4642}
4643
4644// Checks if \p FormatTok is a line comment that continues the line comment
4645// section on \p Line.
4646static bool
4648 const UnwrappedLine &Line, const FormatStyle &Style,
4649 const llvm::Regex &CommentPragmasRegex) {
4650 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4651 return false;
4652
4653 StringRef IndentContent = FormatTok.TokenText;
4654 if (FormatTok.TokenText.starts_with("//") ||
4655 FormatTok.TokenText.starts_with("/*")) {
4656 IndentContent = FormatTok.TokenText.substr(2);
4657 }
4658 if (CommentPragmasRegex.match(IndentContent))
4659 return false;
4660
4661 // If Line starts with a line comment, then FormatTok continues the comment
4662 // section if its original column is greater or equal to the original start
4663 // column of the line.
4664 //
4665 // Define the min column token of a line as follows: if a line ends in '{' or
4666 // contains a '{' followed by a line comment, then the min column token is
4667 // that '{'. Otherwise, the min column token of the line is the first token of
4668 // the line.
4669 //
4670 // If Line starts with a token other than a line comment, then FormatTok
4671 // continues the comment section if its original column is greater than the
4672 // original start column of the min column token of the line.
4673 //
4674 // For example, the second line comment continues the first in these cases:
4675 //
4676 // // first line
4677 // // second line
4678 //
4679 // and:
4680 //
4681 // // first line
4682 // // second line
4683 //
4684 // and:
4685 //
4686 // int i; // first line
4687 // // second line
4688 //
4689 // and:
4690 //
4691 // do { // first line
4692 // // second line
4693 // int i;
4694 // } while (true);
4695 //
4696 // and:
4697 //
4698 // enum {
4699 // a, // first line
4700 // // second line
4701 // b
4702 // };
4703 //
4704 // The second line comment doesn't continue the first in these cases:
4705 //
4706 // // first line
4707 // // second line
4708 //
4709 // and:
4710 //
4711 // int i; // first line
4712 // // second line
4713 //
4714 // and:
4715 //
4716 // do { // first line
4717 // // second line
4718 // int i;
4719 // } while (true);
4720 //
4721 // and:
4722 //
4723 // enum {
4724 // a, // first line
4725 // // second line
4726 // };
4727 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4728
4729 // Scan for '{//'. If found, use the column of '{' as a min column for line
4730 // comment section continuation.
4731 const FormatToken *PreviousToken = nullptr;
4732 for (const UnwrappedLineNode &Node : Line.Tokens) {
4733 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4734 isLineComment(*Node.Tok)) {
4735 MinColumnToken = PreviousToken;
4736 break;
4737 }
4738 PreviousToken = Node.Tok;
4739
4740 // Grab the last newline preceding a token in this unwrapped line.
4741 if (Node.Tok->NewlinesBefore > 0)
4742 MinColumnToken = Node.Tok;
4743 }
4744 if (PreviousToken && PreviousToken->is(tok::l_brace))
4745 MinColumnToken = PreviousToken;
4746
4747 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4748 MinColumnToken);
4749}
4750
4751void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4752 bool JustComments = Line->Tokens.empty();
4753 for (FormatToken *Tok : CommentsBeforeNextToken) {
4754 // Line comments that belong to the same line comment section are put on the
4755 // same line since later we might want to reflow content between them.
4756 // Additional fine-grained breaking of line comment sections is controlled
4757 // by the class BreakableLineCommentSection in case it is desirable to keep
4758 // several line comment sections in the same unwrapped line.
4759 //
4760 // FIXME: Consider putting separate line comment sections as children to the
4761 // unwrapped line instead.
4762 Tok->ContinuesLineCommentSection =
4763 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex);
4764 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4765 addUnwrappedLine();
4766 pushToken(Tok);
4767 }
4768 if (NewlineBeforeNext && JustComments)
4769 addUnwrappedLine();
4770 CommentsBeforeNextToken.clear();
4771}
4772
4773void UnwrappedLineParser::nextToken(int LevelDifference) {
4774 if (eof())
4775 return;
4776 flushComments(isOnNewLine(*FormatTok));
4777 pushToken(FormatTok);
4778 FormatToken *Previous = FormatTok;
4779 if (!Style.isJavaScript())
4780 readToken(LevelDifference);
4781 else
4782 readTokenWithJavaScriptASI();
4783 FormatTok->Previous = Previous;
4784 if (Style.isVerilog()) {
4785 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4786 // keywords like `begin`, we can't treat them the same as left braces
4787 // because some contexts require one of them. For example structs use
4788 // braces and if blocks use keywords, and a left brace can occur in an if
4789 // statement, but it is not a block. For keywords like `end`, we simply
4790 // treat them the same as right braces.
4791 if (Keywords.isVerilogEnd(*FormatTok))
4792 FormatTok->Tok.setKind(tok::r_brace);
4793 }
4794}
4795
4796void UnwrappedLineParser::distributeComments(
4797 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4798 // Whether or not a line comment token continues a line is controlled by
4799 // the method continuesLineCommentSection, with the following caveat:
4800 //
4801 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4802 // that each comment line from the trail is aligned with the next token, if
4803 // the next token exists. If a trail exists, the beginning of the maximal
4804 // trail is marked as a start of a new comment section.
4805 //
4806 // For example in this code:
4807 //
4808 // int a; // line about a
4809 // // line 1 about b
4810 // // line 2 about b
4811 // int b;
4812 //
4813 // the two lines about b form a maximal trail, so there are two sections, the
4814 // first one consisting of the single comment "// line about a" and the
4815 // second one consisting of the next two comments.
4816 if (Comments.empty())
4817 return;
4818 bool ShouldPushCommentsInCurrentLine = true;
4819 bool HasTrailAlignedWithNextToken = false;
4820 unsigned StartOfTrailAlignedWithNextToken = 0;
4821 if (NextTok) {
4822 // We are skipping the first element intentionally.
4823 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4824 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4825 HasTrailAlignedWithNextToken = true;
4826 StartOfTrailAlignedWithNextToken = i;
4827 }
4828 }
4829 }
4830 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4831 FormatToken *FormatTok = Comments[i];
4832 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4833 FormatTok->ContinuesLineCommentSection = false;
4834 } else {
4835 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4836 *FormatTok, *Line, Style, CommentPragmasRegex);
4837 }
4838 if (!FormatTok->ContinuesLineCommentSection &&
4839 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4840 ShouldPushCommentsInCurrentLine = false;
4841 }
4842 if (ShouldPushCommentsInCurrentLine)
4843 pushToken(FormatTok);
4844 else
4845 CommentsBeforeNextToken.push_back(FormatTok);
4846 }
4847}
4848
4849void UnwrappedLineParser::readToken(int LevelDifference) {
4850 SmallVector<FormatToken *, 1> Comments;
4851 bool PreviousWasComment = false;
4852 bool FirstNonCommentOnLine = false;
4853 do {
4854 FormatTok = Tokens->getNextToken();
4855 assert(FormatTok);
4856 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4857 TT_ConflictAlternative)) {
4858 if (FormatTok->is(TT_ConflictStart))
4859 conditionalCompilationStart(/*Unreachable=*/false);
4860 else if (FormatTok->is(TT_ConflictAlternative))
4861 conditionalCompilationAlternative();
4862 else if (FormatTok->is(TT_ConflictEnd))
4863 conditionalCompilationEnd();
4864 FormatTok = Tokens->getNextToken();
4865 FormatTok->MustBreakBefore = true;
4866 FormatTok->MustBreakBeforeFinalized = true;
4867 }
4868
4869 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4870 const FormatToken &Tok,
4871 bool PreviousWasComment) {
4872 auto IsFirstOnLine = [](const FormatToken &Tok) {
4873 return Tok.HasUnescapedNewline || Tok.IsFirst;
4874 };
4875
4876 // Consider preprocessor directives preceded by block comments as first
4877 // on line.
4878 if (PreviousWasComment)
4879 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4880 return IsFirstOnLine(Tok);
4881 };
4882
4883 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4884 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4885 PreviousWasComment = FormatTok->is(tok::comment);
4886
4887 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4888 (!Style.isVerilog() ||
4889 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4890 FirstNonCommentOnLine) {
4891 distributeComments(Comments, FormatTok);
4892 Comments.clear();
4893 // If there is an unfinished unwrapped line, we flush the preprocessor
4894 // directives only after that unwrapped line was finished later.
4895 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4896 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4897 assert((LevelDifference >= 0 ||
4898 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4899 "LevelDifference makes Line->Level negative");
4900 Line->Level += LevelDifference;
4901 // Comments stored before the preprocessor directive need to be output
4902 // before the preprocessor directive, at the same level as the
4903 // preprocessor directive, as we consider them to apply to the directive.
4905 PPBranchLevel > 0) {
4906 Line->Level += PPBranchLevel;
4907 }
4908 assert(Line->Level >= Line->UnbracedBodyLevel);
4909 Line->Level -= Line->UnbracedBodyLevel;
4910 flushComments(isOnNewLine(*FormatTok));
4911 parsePPDirective();
4912 PreviousWasComment = FormatTok->is(tok::comment);
4913 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4914 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4915 }
4916
4917 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4918 !Line->InPPDirective) {
4919 continue;
4920 }
4921
4922 if (FormatTok->is(tok::identifier) &&
4923 Macros.defined(FormatTok->TokenText) &&
4924 // FIXME: Allow expanding macros in preprocessor directives.
4925 !Line->InPPDirective) {
4926 FormatToken *ID = FormatTok;
4927 unsigned Position = Tokens->getPosition();
4928
4929 // To correctly parse the code, we need to replace the tokens of the macro
4930 // call with its expansion.
4931 auto PreCall = std::move(Line);
4932 Line.reset(new UnwrappedLine);
4933 bool OldInExpansion = InExpansion;
4934 InExpansion = true;
4935 // We parse the macro call into a new line.
4936 auto Args = parseMacroCall();
4937 InExpansion = OldInExpansion;
4938 assert(Line->Tokens.front().Tok == ID);
4939 // And remember the unexpanded macro call tokens.
4940 auto UnexpandedLine = std::move(Line);
4941 // Reset to the old line.
4942 Line = std::move(PreCall);
4943
4944 LLVM_DEBUG({
4945 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4946 if (Args) {
4947 llvm::dbgs() << "(";
4948 for (const auto &Arg : Args.value())
4949 for (const auto &T : Arg)
4950 llvm::dbgs() << T->TokenText << " ";
4951 llvm::dbgs() << ")";
4952 }
4953 llvm::dbgs() << "\n";
4954 });
4955 if (Macros.objectLike(ID->TokenText) && Args &&
4956 !Macros.hasArity(ID->TokenText, Args->size())) {
4957 // The macro is either
4958 // - object-like, but we got argumnets, or
4959 // - overloaded to be both object-like and function-like, but none of
4960 // the function-like arities match the number of arguments.
4961 // Thus, expand as object-like macro.
4962 LLVM_DEBUG(llvm::dbgs()
4963 << "Macro \"" << ID->TokenText
4964 << "\" not overloaded for arity " << Args->size()
4965 << "or not function-like, using object-like overload.");
4966 Args.reset();
4967 UnexpandedLine->Tokens.resize(1);
4968 Tokens->setPosition(Position);
4969 nextToken();
4970 assert(!Args && Macros.objectLike(ID->TokenText));
4971 }
4972 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4973 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4974 // Next, we insert the expanded tokens in the token stream at the
4975 // current position, and continue parsing.
4976 Unexpanded[ID] = std::move(UnexpandedLine);
4977 SmallVector<FormatToken *, 8> Expansion =
4978 Macros.expand(ID, std::move(Args));
4979 if (!Expansion.empty())
4980 FormatTok = Tokens->insertTokens(Expansion);
4981
4982 LLVM_DEBUG({
4983 llvm::dbgs() << "Expanded: ";
4984 for (const auto &T : Expansion)
4985 llvm::dbgs() << T->TokenText << " ";
4986 llvm::dbgs() << "\n";
4987 });
4988 } else {
4989 LLVM_DEBUG({
4990 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4991 << "\", because it was used ";
4992 if (Args)
4993 llvm::dbgs() << "with " << Args->size();
4994 else
4995 llvm::dbgs() << "without";
4996 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4997 });
4998 Tokens->setPosition(Position);
4999 FormatTok = ID;
5000 }
5001 }
5002
5003 if (FormatTok->isNot(tok::comment)) {
5004 distributeComments(Comments, FormatTok);
5005 Comments.clear();
5006 return;
5007 }
5008
5009 Comments.push_back(FormatTok);
5010 } while (!eof());
5011
5012 distributeComments(Comments, nullptr);
5013 Comments.clear();
5014}
5015
5016namespace {
5017template <typename Iterator>
5018void pushTokens(Iterator Begin, Iterator End,
5019 SmallVectorImpl<FormatToken *> &Into) {
5020 for (auto I = Begin; I != End; ++I) {
5021 Into.push_back(I->Tok);
5022 for (const auto &Child : I->Children)
5023 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5024 }
5025}
5026} // namespace
5027
5028std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5029UnwrappedLineParser::parseMacroCall() {
5030 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5031 assert(Line->Tokens.empty());
5032 nextToken();
5033 if (FormatTok->isNot(tok::l_paren))
5034 return Args;
5035 unsigned Position = Tokens->getPosition();
5036 FormatToken *Tok = FormatTok;
5037 nextToken();
5038 Args.emplace();
5039 auto ArgStart = std::prev(Line->Tokens.end());
5040
5041 int Parens = 0;
5042 do {
5043 switch (FormatTok->Tok.getKind()) {
5044 case tok::l_paren:
5045 ++Parens;
5046 nextToken();
5047 break;
5048 case tok::r_paren: {
5049 if (Parens > 0) {
5050 --Parens;
5051 nextToken();
5052 break;
5053 }
5054 Args->push_back({});
5055 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5056 nextToken();
5057 return Args;
5058 }
5059 case tok::comma: {
5060 if (Parens > 0) {
5061 nextToken();
5062 break;
5063 }
5064 Args->push_back({});
5065 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5066 nextToken();
5067 ArgStart = std::prev(Line->Tokens.end());
5068 break;
5069 }
5070 default:
5071 nextToken();
5072 break;
5073 }
5074 } while (!eof());
5075 Line->Tokens.resize(1);
5076 Tokens->setPosition(Position);
5077 FormatTok = Tok;
5078 return {};
5079}
5080
5081void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5082 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5083 if (MustBreakBeforeNextToken) {
5084 Line->Tokens.back().Tok->MustBreakBefore = true;
5085 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5086 MustBreakBeforeNextToken = false;
5087 }
5088}
5089
5090} // end namespace format
5091} // end namespace clang
DynTypedNode Node
static char ID
Definition: Arena.cpp:183
enum clang::sema::@1724::IndirectLocalPathEntry::EntryKind Kind
Expr * E
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:3052
This file contains the main building blocks of macro support in clang-format.
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
do v
Definition: arm_acle.h:91
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:58
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
void setKind(tok::TokenKind K)
Definition: Token.h:95
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:69
tok::TokenKind getKind() const
Definition: Token.h:94
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
static bool isCOperatorFollowingVar(tok::TokenKind Kind)
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1971
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const FormatStyle &Style, const llvm::Regex &CommentPragmasRegex)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1964
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3930
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:212
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
@ Parens
New-expression has a C++98 paren-delimited initializer.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1032
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1865
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1858
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1903
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1877
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1831
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1465
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1795
bool AfterClass
Wrap class definitions.
Definition: Format.h:1375
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1442
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1456
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1390
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1533
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1428
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1422
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1378
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1406
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1470
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
bool isTableGen() const
Definition: Format.h:3297
@ LK_Java
Should be used for Java.
Definition: Format.h:3269
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:3280
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:3278
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3283
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2931
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2802
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2894
bool RemoveSemicolon
Remove semicolons after the closing braces of functions and constructors/destructors.
Definition: Format.h:4028
@ RCS_Always
Apply indentation rules and reflow long comments into new lines, trying to obey the ColumnLimit.
Definition: Format.h:3891
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2840
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2854
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2783
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:2977
RemoveParenthesesStyle RemoveParentheses
Remove redundant parentheses.
Definition: Format.h:4010
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3301
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:3951
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2889
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2871
bool AllowShortLoopsOnASingleLine
If true, while (true) continue; can be put on a single line.
Definition: Format.h:989
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:826
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:3442
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:2200
bool isCSharp() const
Definition: Format.h:3290
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1339
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:1318
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:2083
ReflowCommentsStyle ReflowComments
Comment reformatting style.
Definition: Format.h:3897
bool isVerilog() const
Definition: Format.h:3293
bool isJavaScript() const
Definition: Format.h:3292
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2819
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1587
@ RPS_Leave
Do not remove parentheses.
Definition: Format.h:3984
@ RPS_ReturnStatement
Also remove parentheses enclosing the expression in a return/co_return statement.
Definition: Format.h:3999
bool SkipMacroDefinitionBody
Do not format macro definition body.
Definition: Format.h:4242
@ NI_All
Indent in all namespaces.
Definition: Format.h:3437
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:3427
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2760
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2859
unsigned ColumnLimit
The column limit.
Definition: Format.h:2408
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:297
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:581
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:670
bool isTypeName(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:44
bool isCppAlternativeOperatorKeyword() const
Definition: FormatToken.h:738
bool isNot(T Kind) const
Definition: FormatToken.h:631
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:317
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:840
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:376
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:466
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:392
bool isStringLiteral() const
Definition: FormatToken.h:664
bool isBinaryOperator() const
Definition: FormatToken.h:777
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:612
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:828
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:624
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:379
bool isAccessSpecifierKeyword() const
Definition: FormatToken.h:674
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:563
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:566
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:660
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:445
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex