clang 23.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, *CI, (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63[[maybe_unused]] static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
97public:
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
132public:
134 const FormatStyle &Style, unsigned &LineLevel)
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(getIncludeGuardState(Style.IndentPPDirectives)),
166 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
167 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
168
169void UnwrappedLineParser::reset() {
170 PPBranchLevel = -1;
171 IncludeGuard = getIncludeGuardState(Style.IndentPPDirectives);
172 IncludeGuardToken = nullptr;
173 Line.reset(new UnwrappedLine);
174 CommentsBeforeNextToken.clear();
175 FormatTok = nullptr;
176 AtEndOfPPLine = false;
177 IsDecltypeAutoFunction = false;
178 PreprocessorDirectives.clear();
179 CurrentLines = &Lines;
180 DeclarationScopeStack.clear();
181 NestedTooDeep.clear();
182 NestedLambdas.clear();
183 PPStack.clear();
184 Line->FirstStartColumn = FirstStartColumn;
185
186 if (!Unexpanded.empty())
187 for (FormatToken *Token : AllTokens)
188 Token->MacroCtx.reset();
189 CurrentExpandedLines.clear();
190 ExpandedLines.clear();
191 Unexpanded.clear();
192 InExpansion = false;
193 Reconstruct.reset();
194}
195
197 IndexedTokenSource TokenSource(AllTokens);
198 Line->FirstStartColumn = FirstStartColumn;
199 do {
200 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 reset();
202 Tokens = &TokenSource;
203 TokenSource.reset();
204
205 readToken();
206 parseFile();
207
208 // If we found an include guard then all preprocessor directives (other than
209 // the guard) are over-indented by one.
210 if (IncludeGuard == IG_Found) {
211 for (auto &Line : Lines)
212 if (Line.InPPDirective && Line.Level > 0)
213 --Line.Level;
214 }
215
216 // Create line with eof token.
217 assert(eof());
218 pushToken(FormatTok);
219 addUnwrappedLine();
220
221 // In a first run, format everything with the lines containing macro calls
222 // replaced by the expansion.
223 if (!ExpandedLines.empty()) {
224 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
225 for (const auto &Line : Lines) {
226 if (!Line.Tokens.empty()) {
227 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
228 if (it != ExpandedLines.end()) {
229 for (const auto &Expanded : it->second) {
230 LLVM_DEBUG(printDebugInfo(Expanded));
231 Callback.consumeUnwrappedLine(Expanded);
232 }
233 continue;
234 }
235 }
236 LLVM_DEBUG(printDebugInfo(Line));
237 Callback.consumeUnwrappedLine(Line);
238 }
239 Callback.finishRun();
240 }
241
242 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
243 for (const UnwrappedLine &Line : Lines) {
244 LLVM_DEBUG(printDebugInfo(Line));
245 Callback.consumeUnwrappedLine(Line);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260}
261
262void UnwrappedLineParser::parseFile() {
263 // The top-level context in a file always has declarations, except for pre-
264 // processor directives and JavaScript files.
265 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
266 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
267 MustBeDeclaration);
268 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
269 parseBracedList();
270 else
271 parseLevel();
272 // Make sure to format the remaining tokens.
273 //
274 // LK_TextProto is special since its top-level is parsed as the body of a
275 // braced list, which does not necessarily have natural line separators such
276 // as a semicolon. Comments after the last entry that have been determined to
277 // not belong to that line, as in:
278 // key: value
279 // // endfile comment
280 // do not have a chance to be put on a line of their own until this point.
281 // Here we add this newline before end-of-file comments.
282 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
283 addUnwrappedLine();
284 flushComments(true);
285 addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 case tok::semi:
293 return;
294 default:
295 if (FormatTok->is(Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
300 }
301 nextToken();
302 break;
303 }
304 } while (!eof());
305}
306
307void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
317 }
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
326 }
327 } while (!eof());
328}
329
330bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
333
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337}
338
339/// Parses a level, that is ???.
340/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341/// \param IfKind The \p if statement kind in the level.
342/// \param IfLeftBrace The left brace of the \p if block in the level.
343/// \returns true if a simple block of if/else/for/while, or false otherwise.
344/// (A simple block has a single statement.)
345bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
357
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 if (FormatTok->is(tok::l_paren))
362 parseParens();
363 continue;
364 }
365 tok::TokenKind Kind = FormatTok->Tok.getKind();
366 if (FormatTok->is(TT_MacroBlockBegin))
367 Kind = tok::l_brace;
368 else if (FormatTok->is(TT_MacroBlockEnd))
369 Kind = tok::r_brace;
370
371 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
372 &HasLabel, &StatementCount] {
373 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
374 HasDoWhile ? nullptr : &HasDoWhile,
375 HasLabel ? nullptr : &HasLabel);
376 ++StatementCount;
377 assert(StatementCount > 0 && "StatementCount overflow!");
378 };
379
380 switch (Kind) {
381 case tok::comment:
382 nextToken();
383 addUnwrappedLine();
384 break;
385 case tok::l_brace:
386 if (InRequiresExpression) {
387 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
388 } else if (FormatTok->Previous &&
389 FormatTok->Previous->ClosesRequiresClause) {
390 // We need the 'default' case here to correctly parse a function
391 // l_brace.
392 ParseDefault();
393 continue;
394 }
395 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
396 if (tryToParseBracedList())
397 continue;
398 FormatTok->setFinalizedType(TT_BlockLBrace);
399 }
400 parseBlock();
401 ++StatementCount;
402 assert(StatementCount > 0 && "StatementCount overflow!");
403 addUnwrappedLine();
404 break;
405 case tok::r_brace:
406 if (OpeningBrace) {
407 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
408 OpeningBrace->isNoneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
409 return false;
410 }
411 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
412 HasDoWhile || IsPrecededByCommentOrPPDirective ||
413 precededByCommentOrPPDirective()) {
414 return false;
415 }
416 const FormatToken *Next = Tokens->peekNextToken();
417 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
418 return false;
419 if (IfLeftBrace)
420 *IfLeftBrace = IfLBrace;
421 return true;
422 }
423 nextToken();
424 addUnwrappedLine();
425 break;
426 case tok::kw_default: {
427 unsigned StoredPosition = Tokens->getPosition();
428 auto *Next = Tokens->getNextNonComment();
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNoneOf(tok::colon, tok::arrow)) {
431 // default not followed by `:` or `->` is not a case label; treat it
432 // like an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
454 }
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
463 }
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
470 }
471 } while (!eof());
472
473 return false;
474}
475
476void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
489 const FormatToken *PrevTok;
490 };
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493
494 do {
495 auto *NextTok = Tokens->getNextNonComment();
496
497 if (!Line->InMacroBody && !Style.isTableGen()) {
498 // Skip PPDirective lines (except macro definitions) and comments.
499 while (NextTok->is(tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define))
502 break;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof));
506
507 while (NextTok->is(tok::comment))
508 NextTok = Tokens->getNextToken();
509 }
510 }
511
512 switch (Tok->Tok.getKind()) {
513 case tok::l_brace:
514 if (Style.isJavaScript() && PrevTok) {
515 if (PrevTok->isOneOf(tok::colon, tok::less)) {
516 // A ':' indicates this code is in a type, or a braced list
517 // following a label in an object literal ({a: {b: 1}}).
518 // A '<' could be an object used in a comparison, but that is nonsense
519 // code (can never return true), so more likely it is a generic type
520 // argument (`X<{a: string; b: number}>`).
521 // The code below could be confused by semicolons between the
522 // individual members in a type member list, which would normally
523 // trigger BK_Block. In both cases, this must be parsed as an inline
524 // braced init.
525 Tok->setBlockKind(BK_BracedInit);
526 } else if (PrevTok->is(tok::r_paren)) {
527 // `) { }` can only occur in function or method declarations in JS.
528 Tok->setBlockKind(BK_Block);
529 }
530 } else if (Style.isJava() && PrevTok && PrevTok->is(tok::arrow)) {
531 Tok->setBlockKind(BK_Block);
532 } else {
533 Tok->setBlockKind(BK_Unknown);
534 }
535 LBraceStack.push_back({Tok, PrevTok});
536 break;
537 case tok::r_brace:
538 if (LBraceStack.empty())
539 break;
540 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
541 bool ProbablyBracedList = false;
542 if (Style.Language == FormatStyle::LK_Proto) {
543 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
544 } else if (LBrace->isNot(TT_EnumLBrace)) {
545 // Using OriginalColumn to distinguish between ObjC methods and
546 // binary operators is a bit hacky.
547 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
548 NextTok->OriginalColumn == 0;
549
550 // Try to detect a braced list. Note that regardless how we mark inner
551 // braces here, we will overwrite the BlockKind later if we parse a
552 // braced list (where all blocks inside are by default braced lists),
553 // or when we explicitly detect blocks (for example while parsing
554 // lambdas).
555
556 // If we already marked the opening brace as braced list, the closing
557 // must also be part of it.
558 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
559
560 ProbablyBracedList = ProbablyBracedList ||
561 (Style.isJavaScript() &&
562 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
563 Keywords.kw_as));
564 ProbablyBracedList =
565 ProbablyBracedList ||
566 (IsCpp && (PrevTok->Tok.isLiteral() ||
567 NextTok->isOneOf(tok::l_paren, tok::arrow)));
568
569 // If there is a comma, or right paren after the closing brace, we
570 // assume this is a braced initializer list.
571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572 // braced list in JS.
573 ProbablyBracedList =
574 ProbablyBracedList ||
575 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
576 tok::r_paren, tok::r_square, tok::ellipsis);
577
578 // Distinguish between braced list in a constructor initializer list
579 // followed by constructor body, or just adjacent blocks.
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
583 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
584 tok::greater));
585
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(tok::identifier) &&
589 PrevTok->isNoneOf(tok::semi, tok::r_brace, tok::l_brace));
590
591 ProbablyBracedList = ProbablyBracedList ||
592 (NextTok->is(tok::semi) &&
593 (!ExpectClassBody || LBraceStack.size() != 1));
594
595 ProbablyBracedList =
596 ProbablyBracedList ||
597 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
598
599 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok = Tokens->getNextToken();
603 ProbablyBracedList = NextTok->isNot(tok::l_square);
604 }
605
606 // Cpp macro definition body that is a nonempty braced list or block:
607 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
608 !FormatTok->Previous && NextTok->is(tok::eof) &&
609 // A statement can end with only `;` (simple statement), a block
610 // closing brace (compound statement), or `:` (label statement).
611 // If PrevTok is a block opening brace, Tok ends an empty block.
612 PrevTok->isNoneOf(tok::semi, BK_Block, tok::colon)) {
613 ProbablyBracedList = true;
614 }
615 }
616 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
617 Tok->setBlockKind(BlockKind);
618 LBrace->setBlockKind(BlockKind);
619 }
620 LBraceStack.pop_back();
621 break;
622 case tok::identifier:
623 if (Tok->isNot(TT_StatementMacro))
624 break;
625 [[fallthrough]];
626 case tok::at:
627 case tok::semi:
628 case tok::kw_if:
629 case tok::kw_while:
630 case tok::kw_for:
631 case tok::kw_switch:
632 case tok::kw_try:
633 case tok::kw___try:
634 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
635 LBraceStack.back().Tok->setBlockKind(BK_Block);
636 break;
637 default:
638 break;
639 }
640
641 PrevTok = Tok;
642 Tok = NextTok;
643 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
644
645 // Assume other blocks for all unclosed opening braces.
646 for (const auto &Entry : LBraceStack)
647 if (Entry.Tok->is(BK_Unknown))
648 Entry.Tok->setBlockKind(BK_Block);
649
650 FormatTok = Tokens->setPosition(StoredPosition);
651}
652
653// Sets the token type of the directly previous right brace.
654void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
655 if (auto Prev = FormatTok->getPreviousNonComment();
656 Prev && Prev->is(tok::r_brace)) {
657 Prev->setFinalizedType(Type);
658 }
659}
660
661template <class T>
662static inline void hash_combine(std::size_t &seed, const T &v) {
663 std::hash<T> hasher;
664 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
665}
666
667size_t UnwrappedLineParser::computePPHash() const {
668 size_t h = 0;
669 for (const auto &i : PPStack) {
670 hash_combine(h, size_t(i.Kind));
671 hash_combine(h, i.Line);
672 }
673 return h;
674}
675
676// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
677// is not null, subtracts its length (plus the preceding space) when computing
678// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
679// running the token annotator on it so that we can restore them afterward.
680bool UnwrappedLineParser::mightFitOnOneLine(
681 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
682 const auto ColumnLimit = Style.ColumnLimit;
683 if (ColumnLimit == 0)
684 return true;
685
686 auto &Tokens = ParsedLine.Tokens;
687 assert(!Tokens.empty());
688
689 const auto *LastToken = Tokens.back().Tok;
690 assert(LastToken);
691
692 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
693
694 int Index = 0;
695 for (const auto &Token : Tokens) {
696 assert(Token.Tok);
697 auto &SavedToken = SavedTokens[Index++];
698 SavedToken.Tok = new FormatToken;
699 SavedToken.Tok->copyFrom(*Token.Tok);
700 SavedToken.Children = std::move(Token.Children);
701 }
702
703 AnnotatedLine Line(ParsedLine);
704 assert(Line.Last == LastToken);
705
706 TokenAnnotator Annotator(Style, Keywords);
707 Annotator.annotate(Line);
708 Annotator.calculateFormattingInformation(Line);
709
710 auto Length = LastToken->TotalLength;
711 if (OpeningBrace) {
712 assert(OpeningBrace != Tokens.front().Tok);
713 if (auto Prev = OpeningBrace->Previous;
714 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
715 Length -= ColumnLimit;
716 }
717 Length -= OpeningBrace->TokenText.size() + 1;
718 }
719
720 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
721 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
722 Length -= FirstToken->TokenText.size() + 1;
723 }
724
725 Index = 0;
726 for (auto &Token : Tokens) {
727 const auto &SavedToken = SavedTokens[Index++];
728 Token.Tok->copyFrom(*SavedToken.Tok);
729 Token.Children = std::move(SavedToken.Children);
730 delete SavedToken.Tok;
731 }
732
733 // If these change PPLevel needs to be used for get correct indentation.
734 assert(!Line.InMacroBody);
735 assert(!Line.InPPDirective);
736 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
737}
738
739FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
740 unsigned AddLevels, bool MunchSemi,
741 bool KeepBraces,
742 IfStmtKind *IfKind,
743 bool UnindentWhitesmithsBraces) {
744 auto HandleVerilogBlockLabel = [this]() {
745 // ":" name
746 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
747 nextToken();
748 if (Keywords.isVerilogIdentifier(*FormatTok))
749 nextToken();
750 }
751 };
752
753 // Whether this is a Verilog-specific block that has a special header like a
754 // module.
755 const bool VerilogHierarchy =
756 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
757 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
758 (Style.isVerilog() &&
759 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
760 "'{' or macro block token expected");
761 FormatToken *Tok = FormatTok;
762 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
763 auto Index = CurrentLines->size();
764 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
765 FormatTok->setBlockKind(BK_Block);
766
767 const bool IsWhitesmiths =
768 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
769
770 // For Whitesmiths mode, jump to the next level prior to skipping over the
771 // braces.
772 if (!VerilogHierarchy && AddLevels > 0 && IsWhitesmiths)
773 ++Line->Level;
774
775 size_t PPStartHash = computePPHash();
776
777 const unsigned InitialLevel = Line->Level;
778 if (VerilogHierarchy) {
779 AddLevels += parseVerilogHierarchyHeader();
780 } else {
781 nextToken(/*LevelDifference=*/AddLevels);
782 HandleVerilogBlockLabel();
783 }
784
785 // Bail out if there are too many levels. Otherwise, the stack might overflow.
786 if (Line->Level > 300)
787 return nullptr;
788
789 if (MacroBlock && FormatTok->is(tok::l_paren))
790 parseParens();
791
792 size_t NbPreprocessorDirectives =
793 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
794 addUnwrappedLine();
795 size_t OpeningLineIndex =
796 CurrentLines->empty()
798 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
799
800 // Whitesmiths is weird here. The brace needs to be indented for the namespace
801 // block, but the block itself may not be indented depending on the style
802 // settings. This allows the format to back up one level in those cases.
803 if (UnindentWhitesmithsBraces)
804 --Line->Level;
805
806 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
807 MustBeDeclaration);
808
809 // Whitesmiths logic has already added a level by this point, so avoid
810 // adding it twice.
811 if (AddLevels > 0u)
812 Line->Level += AddLevels - (IsWhitesmiths ? 1 : 0);
813
814 FormatToken *IfLBrace = nullptr;
815 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
816
817 if (eof())
818 return IfLBrace;
819
820 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
821 : FormatTok->isNot(tok::r_brace)) {
822 Line->Level = InitialLevel;
823 FormatTok->setBlockKind(BK_Block);
824 return IfLBrace;
825 }
826
827 if (FormatTok->is(tok::r_brace)) {
828 FormatTok->setBlockKind(BK_Block);
829 if (Tok->is(TT_NamespaceLBrace))
830 FormatTok->setFinalizedType(TT_NamespaceRBrace);
831 }
832
833 const bool IsFunctionRBrace =
834 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
835
836 auto RemoveBraces = [=]() mutable {
837 if (!SimpleBlock)
838 return false;
839 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
840 assert(FormatTok->is(tok::r_brace));
841 const bool WrappedOpeningBrace = !Tok->Previous;
842 if (WrappedOpeningBrace && FollowedByComment)
843 return false;
844 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
845 if (KeepBraces && !HasRequiredIfBraces)
846 return false;
847 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
848 const FormatToken *Previous = Tokens->getPreviousToken();
849 assert(Previous);
850 if (Previous->is(tok::r_brace) && !Previous->Optional)
851 return false;
852 }
853 assert(!CurrentLines->empty());
854 auto &LastLine = CurrentLines->back();
855 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
856 return false;
857 if (Tok->is(TT_ElseLBrace))
858 return true;
859 if (WrappedOpeningBrace) {
860 assert(Index > 0);
861 --Index; // The line above the wrapped l_brace.
862 Tok = nullptr;
863 }
864 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
865 };
866 if (RemoveBraces()) {
867 Tok->MatchingParen = FormatTok;
868 FormatTok->MatchingParen = Tok;
869 }
870
871 size_t PPEndHash = computePPHash();
872
873 // Munch the closing brace.
874 nextToken(/*LevelDifference=*/-AddLevels);
875
876 // When this is a function block and there is an unnecessary semicolon
877 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
878 // it later).
879 if (Style.RemoveSemicolon && IsFunctionRBrace) {
880 while (FormatTok->is(tok::semi)) {
881 FormatTok->Optional = true;
882 nextToken();
883 }
884 }
885
886 HandleVerilogBlockLabel();
887
888 if (MacroBlock && FormatTok->is(tok::l_paren))
889 parseParens();
890
891 Line->Level = InitialLevel;
892
893 if (FormatTok->is(tok::kw_noexcept)) {
894 // A noexcept in a requires expression.
895 nextToken();
896 }
897
898 if (FormatTok->is(tok::arrow)) {
899 // Following the } or noexcept we can find a trailing return type arrow
900 // as part of an implicit conversion constraint.
901 nextToken();
902 parseStructuralElement();
903 }
904
905 if (MunchSemi && FormatTok->is(tok::semi))
906 nextToken();
907
908 if (PPStartHash == PPEndHash) {
909 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
910 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
911 // Update the opening line to add the forward reference as well
912 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
913 CurrentLines->size() - 1;
914 }
915 }
916
917 return IfLBrace;
918}
919
920static bool isGoogScope(const UnwrappedLine &Line) {
921 // FIXME: Closure-library specific stuff should not be hard-coded but be
922 // configurable.
923 if (Line.Tokens.size() < 4)
924 return false;
925 auto I = Line.Tokens.begin();
926 if (I->Tok->TokenText != "goog")
927 return false;
928 ++I;
929 if (I->Tok->isNot(tok::period))
930 return false;
931 ++I;
932 if (I->Tok->TokenText != "scope")
933 return false;
934 ++I;
935 return I->Tok->is(tok::l_paren);
936}
937
938static bool isIIFE(const UnwrappedLine &Line,
939 const AdditionalKeywords &Keywords) {
940 // Look for the start of an immediately invoked anonymous function.
941 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
942 // This is commonly done in JavaScript to create a new, anonymous scope.
943 // Example: (function() { ... })()
944 if (Line.Tokens.size() < 3)
945 return false;
946 auto I = Line.Tokens.begin();
947 if (I->Tok->isNot(tok::l_paren))
948 return false;
949 ++I;
950 if (I->Tok->isNot(Keywords.kw_function))
951 return false;
952 ++I;
953 return I->Tok->is(tok::l_paren);
954}
955
956static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
957 const FormatToken &InitialToken,
958 bool IsEmptyBlock,
959 bool IsJavaRecord = false) {
960 if (IsJavaRecord)
961 return Style.BraceWrapping.AfterClass;
962
963 tok::TokenKind Kind = InitialToken.Tok.getKind();
964 if (InitialToken.is(TT_NamespaceMacro))
965 Kind = tok::kw_namespace;
966
967 const bool WrapRecordAllowed =
968 !IsEmptyBlock ||
969 Style.AllowShortRecordOnASingleLine < FormatStyle::SRS_Empty ||
970 Style.BraceWrapping.SplitEmptyRecord;
971
972 switch (Kind) {
973 case tok::kw_namespace:
974 return Style.BraceWrapping.AfterNamespace;
975 case tok::kw_class:
976 return Style.BraceWrapping.AfterClass && WrapRecordAllowed;
977 case tok::kw_union:
978 return Style.BraceWrapping.AfterUnion && WrapRecordAllowed;
979 case tok::kw_struct:
980 return Style.BraceWrapping.AfterStruct && WrapRecordAllowed;
981 case tok::kw_enum:
982 return Style.BraceWrapping.AfterEnum;
983 default:
984 return false;
985 }
986}
987
988void UnwrappedLineParser::parseChildBlock() {
989 assert(FormatTok->is(tok::l_brace));
990 FormatTok->setBlockKind(BK_Block);
991 const FormatToken *OpeningBrace = FormatTok;
992 nextToken();
993 {
994 bool SkipIndent = (Style.isJavaScript() &&
995 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
996 ScopedLineState LineState(*this);
997 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
998 /*MustBeDeclaration=*/false);
999 Line->Level += SkipIndent ? 0 : 1;
1000 parseLevel(OpeningBrace);
1001 flushComments(isOnNewLine(*FormatTok));
1002 Line->Level -= SkipIndent ? 0 : 1;
1003 }
1004 nextToken();
1005}
1006
1007void UnwrappedLineParser::parsePPDirective() {
1008 assert(FormatTok->is(tok::hash) && "'#' expected");
1009 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1010
1011 nextToken();
1012
1013 if (!FormatTok->Tok.getIdentifierInfo()) {
1014 parsePPUnknown();
1015 return;
1016 }
1017
1018 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1019 case tok::pp_define:
1020 parsePPDefine();
1021 return;
1022 case tok::pp_if:
1023 parsePPIf(/*IfDef=*/false);
1024 break;
1025 case tok::pp_ifdef:
1026 case tok::pp_ifndef:
1027 parsePPIf(/*IfDef=*/true);
1028 break;
1029 case tok::pp_else:
1030 case tok::pp_elifdef:
1031 case tok::pp_elifndef:
1032 case tok::pp_elif:
1033 parsePPElse();
1034 break;
1035 case tok::pp_endif:
1036 parsePPEndIf();
1037 break;
1038 case tok::pp_pragma:
1039 parsePPPragma();
1040 break;
1041 case tok::pp_error:
1042 case tok::pp_warning:
1043 nextToken();
1044 if (!eof() && Style.isCpp())
1045 FormatTok->setFinalizedType(TT_AfterPPDirective);
1046 [[fallthrough]];
1047 default:
1048 parsePPUnknown();
1049 break;
1050 }
1051}
1052
1053void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1054 size_t Line = CurrentLines->size();
1055 if (CurrentLines == &PreprocessorDirectives)
1056 Line += Lines.size();
1057
1058 if (Unreachable ||
1059 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1060 PPStack.push_back({PP_Unreachable, Line});
1061 } else {
1062 PPStack.push_back({PP_Conditional, Line});
1063 }
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1067 ++PPBranchLevel;
1068 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1069 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1070 PPLevelBranchIndex.push_back(0);
1071 PPLevelBranchCount.push_back(0);
1072 }
1073 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1074 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1075 conditionalCompilationCondition(Unreachable || Skip);
1076}
1077
1078void UnwrappedLineParser::conditionalCompilationAlternative() {
1079 if (!PPStack.empty())
1080 PPStack.pop_back();
1081 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1082 if (!PPChainBranchIndex.empty())
1083 ++PPChainBranchIndex.top();
1084 conditionalCompilationCondition(
1085 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1086 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1087}
1088
1089void UnwrappedLineParser::conditionalCompilationEnd() {
1090 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1091 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1092 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1093 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1094 }
1095 // Guard against #endif's without #if.
1096 if (PPBranchLevel > -1)
1097 --PPBranchLevel;
1098 if (!PPChainBranchIndex.empty())
1099 PPChainBranchIndex.pop();
1100 if (!PPStack.empty())
1101 PPStack.pop_back();
1102}
1103
1104void UnwrappedLineParser::parsePPIf(bool IfDef) {
1105 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1106 nextToken();
1107 bool Unreachable = false;
1108 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1109 Unreachable = true;
1110 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1111 Unreachable = true;
1112 conditionalCompilationStart(Unreachable);
1113 FormatToken *IfCondition = FormatTok;
1114 // If there's a #ifndef on the first line, and the only lines before it are
1115 // comments, it could be an include guard.
1116 bool MaybeIncludeGuard = IfNDef;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 for (auto &Line : Lines) {
1119 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1120 MaybeIncludeGuard = false;
1121 IncludeGuard = IG_Rejected;
1122 break;
1123 }
1124 }
1125 }
1126 --PPBranchLevel;
1127 parsePPUnknown();
1128 ++PPBranchLevel;
1129 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1130 IncludeGuard = IG_IfNdefed;
1131 IncludeGuardToken = IfCondition;
1132 }
1133}
1134
1135void UnwrappedLineParser::parsePPElse() {
1136 // If a potential include guard has an #else, it's not an include guard.
1137 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1138 IncludeGuard = IG_Rejected;
1139 // Don't crash when there is an #else without an #if.
1140 assert(PPBranchLevel >= -1);
1141 if (PPBranchLevel == -1)
1142 conditionalCompilationStart(/*Unreachable=*/true);
1143 conditionalCompilationAlternative();
1144 --PPBranchLevel;
1145 parsePPUnknown();
1146 ++PPBranchLevel;
1147}
1148
1149void UnwrappedLineParser::parsePPEndIf() {
1150 conditionalCompilationEnd();
1151 parsePPUnknown();
1152}
1153
1154void UnwrappedLineParser::parsePPDefine() {
1155 nextToken();
1156
1157 if (!FormatTok->Tok.getIdentifierInfo()) {
1158 IncludeGuard = IG_Rejected;
1159 IncludeGuardToken = nullptr;
1160 parsePPUnknown();
1161 return;
1162 }
1163
1164 bool MaybeIncludeGuard = false;
1165 if (IncludeGuard == IG_IfNdefed &&
1166 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1167 IncludeGuard = IG_Defined;
1168 IncludeGuardToken = nullptr;
1169 for (auto &Line : Lines) {
1170 if (Line.Tokens.front().Tok->isNoneOf(tok::comment, tok::hash)) {
1171 IncludeGuard = IG_Rejected;
1172 break;
1173 }
1174 }
1175 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1176 }
1177
1178 // In the context of a define, even keywords should be treated as normal
1179 // identifiers. Setting the kind to identifier is not enough, because we need
1180 // to treat additional keywords like __except as well, which are already
1181 // identifiers. Setting the identifier info to null interferes with include
1182 // guard processing above, and changes preprocessing nesting.
1183 FormatTok->Tok.setKind(tok::identifier);
1184 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1185 nextToken();
1186
1187 // IncludeGuard can't have a non-empty macro definition.
1188 if (MaybeIncludeGuard && !eof())
1189 IncludeGuard = IG_Rejected;
1190
1191 if (FormatTok->is(tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1192 parseParens();
1193 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1194 Line->Level += PPBranchLevel + 1;
1195 addUnwrappedLine();
1196 ++Line->Level;
1197
1198 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1199 assert((int)Line->PPLevel >= 0);
1200
1201 if (eof())
1202 return;
1203
1204 Line->InMacroBody = true;
1205
1206 if (!Style.SkipMacroDefinitionBody) {
1207 // Errors during a preprocessor directive can only affect the layout of the
1208 // preprocessor directive, and thus we ignore them. An alternative approach
1209 // would be to use the same approach we use on the file level (no
1210 // re-indentation if there was a structural error) within the macro
1211 // definition.
1212 parseFile();
1213 return;
1214 }
1215
1216 for (auto *Comment : CommentsBeforeNextToken)
1217 Comment->Finalized = true;
1218
1219 do {
1220 FormatTok->Finalized = true;
1221 FormatTok = Tokens->getNextToken();
1222 } while (!eof());
1223
1224 addUnwrappedLine();
1225}
1226
1227void UnwrappedLineParser::parsePPPragma() {
1228 Line->InPragmaDirective = true;
1229 parsePPUnknown();
1230}
1231
1232void UnwrappedLineParser::parsePPUnknown() {
1233 while (!eof())
1234 nextToken();
1235 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1236 Line->Level += PPBranchLevel + 1;
1237 addUnwrappedLine();
1238}
1239
1240// Here we exclude certain tokens that are not usually the first token in an
1241// unwrapped line. This is used in attempt to distinguish macro calls without
1242// trailing semicolons from other constructs split to several lines.
1244 // Semicolon can be a null-statement, l_square can be a start of a macro or
1245 // a C++11 attribute, but this doesn't seem to be common.
1246 return Tok.isNoneOf(tok::semi, tok::l_brace,
1247 // Tokens that can only be used as binary operators and a
1248 // part of overloaded operator names.
1249 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1250 tok::less, tok::greater, tok::slash, tok::percent,
1251 tok::lessless, tok::greatergreater, tok::equal,
1252 tok::plusequal, tok::minusequal, tok::starequal,
1253 tok::slashequal, tok::percentequal, tok::ampequal,
1254 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1255 tok::lesslessequal,
1256 // Colon is used in labels, base class lists, initializer
1257 // lists, range-based for loops, ternary operator, but
1258 // should never be the first token in an unwrapped line.
1259 tok::colon,
1260 // 'noexcept' is a trailing annotation.
1261 tok::kw_noexcept);
1262}
1263
1264static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1265 const FormatToken *FormatTok) {
1266 // FIXME: This returns true for C/C++ keywords like 'struct'.
1267 return FormatTok->is(tok::identifier) &&
1268 (!FormatTok->Tok.getIdentifierInfo() ||
1269 FormatTok->isNoneOf(
1270 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1271 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1272 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1273 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1274 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1275 Keywords.kw_instanceof, Keywords.kw_interface,
1276 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1277}
1278
1279static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1280 const FormatToken *FormatTok) {
1281 return FormatTok->Tok.isLiteral() ||
1282 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1283 mustBeJSIdent(Keywords, FormatTok);
1284}
1285
1286// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1287// when encountered after a value (see mustBeJSIdentOrValue).
1288static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1289 const FormatToken *FormatTok) {
1290 return FormatTok->isOneOf(
1291 tok::kw_return, Keywords.kw_yield,
1292 // conditionals
1293 tok::kw_if, tok::kw_else,
1294 // loops
1295 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1296 // switch/case
1297 tok::kw_switch, tok::kw_case,
1298 // exceptions
1299 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1300 // declaration
1301 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1302 Keywords.kw_async, Keywords.kw_function,
1303 // import/export
1304 Keywords.kw_import, tok::kw_export);
1305}
1306
1307// Checks whether a token is a type in K&R C (aka C78).
1308static bool isC78Type(const FormatToken &Tok) {
1309 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1310 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1311 tok::identifier);
1312}
1313
1314// This function checks whether a token starts the first parameter declaration
1315// in a K&R C (aka C78) function definition, e.g.:
1316// int f(a, b)
1317// short a, b;
1318// {
1319// return a + b;
1320// }
1322 const FormatToken *FuncName) {
1323 assert(Tok);
1324 assert(Next);
1325 assert(FuncName);
1326
1327 if (FuncName->isNot(tok::identifier))
1328 return false;
1329
1330 const FormatToken *Prev = FuncName->Previous;
1331 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1332 return false;
1333
1334 if (!isC78Type(*Tok) &&
1335 Tok->isNoneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1336 return false;
1337 }
1338
1339 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1340 return false;
1341
1342 Tok = Tok->Previous;
1343 if (!Tok || Tok->isNot(tok::r_paren))
1344 return false;
1345
1346 Tok = Tok->Previous;
1347 if (!Tok || Tok->isNot(tok::identifier))
1348 return false;
1349
1350 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1351}
1352
1353bool UnwrappedLineParser::parseModuleDecl() {
1354 assert(IsCpp);
1355 assert(FormatTok->is(Keywords.kw_module));
1356
1357 if (Style.Language == FormatStyle::LK_C ||
1358 Style.Standard < FormatStyle::LS_Cpp20) {
1359 return false;
1360 }
1361
1362 nextToken();
1363 if (FormatTok->isNot(tok::identifier))
1364 return false;
1365
1366 for (nextToken(); FormatTok->isNoneOf(tok::semi, tok::eof); nextToken())
1367 if (FormatTok->is(tok::colon))
1368 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1369
1370 nextToken();
1371 Line->IsModuleOrImportDecl = true;
1372 addUnwrappedLine();
1373 return true;
1374}
1375
1376bool UnwrappedLineParser::parseImportDecl() {
1377 assert(IsCpp);
1378 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1379
1380 if (Style.Language == FormatStyle::LK_C ||
1381 Style.Standard < FormatStyle::LS_Cpp20) {
1382 return false;
1383 }
1384
1385 nextToken();
1386 if (FormatTok->is(tok::colon)) {
1387 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1388 nextToken();
1389 }
1390 if (FormatTok->isNoneOf(tok::identifier, tok::less, tok::string_literal))
1391 return false;
1392
1393 for (; FormatTok->isNoneOf(tok::semi, tok::eof); nextToken()) {
1394 // Handle import <foo/bar.h> as we would an include statement.
1395 if (FormatTok->is(tok::less)) {
1396 for (nextToken(); FormatTok->isNoneOf(tok::greater, tok::semi, tok::eof);
1397 nextToken()) {
1398 // Mark tokens as implicit string literals, so that import <A/Foo> will
1399 // neither be broken nor have a space added.
1400 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1401 }
1402 }
1403 }
1404
1405 nextToken();
1406 Line->IsModuleOrImportDecl = true;
1407 addUnwrappedLine();
1408 return true;
1409}
1410
1411// readTokenWithJavaScriptASI reads the next token and terminates the current
1412// line if JavaScript Automatic Semicolon Insertion must
1413// happen between the current token and the next token.
1414//
1415// This method is conservative - it cannot cover all edge cases of JavaScript,
1416// but only aims to correctly handle certain well known cases. It *must not*
1417// return true in speculative cases.
1418void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1419 FormatToken *Previous = FormatTok;
1420 readToken();
1421 FormatToken *Next = FormatTok;
1422
1423 bool IsOnSameLine =
1424 CommentsBeforeNextToken.empty()
1425 ? Next->NewlinesBefore == 0
1426 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1427 if (IsOnSameLine)
1428 return;
1429
1430 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1431 bool PreviousStartsTemplateExpr =
1432 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1433 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1434 // If the line contains an '@' sign, the previous token might be an
1435 // annotation, which can precede another identifier/value.
1436 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1437 return LineNode.Tok->is(tok::at);
1438 });
1439 if (HasAt)
1440 return;
1441 }
1442 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1443 return addUnwrappedLine();
1444 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1445 bool NextEndsTemplateExpr =
1446 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1447 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1448 (PreviousMustBeValue ||
1449 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1450 tok::minusminus))) {
1451 return addUnwrappedLine();
1452 }
1453 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1454 isJSDeclOrStmt(Keywords, Next)) {
1455 return addUnwrappedLine();
1456 }
1457}
1458
1459void UnwrappedLineParser::parseStructuralElement(
1460 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1461 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1462 if (Style.isTableGen() && FormatTok->is(tok::pp_include)) {
1463 nextToken();
1464 if (FormatTok->is(tok::string_literal))
1465 nextToken();
1466 addUnwrappedLine();
1467 return;
1468 }
1469
1470 if (IsCpp) {
1471 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1472 }
1473 } else if (Style.isVerilog()) {
1474 // Skip attributes.
1475 while (FormatTok->is(tok::l_paren) &&
1476 Tokens->peekNextToken()->is(tok::star)) {
1477 parseParens();
1478 }
1479 skipVerilogQualifiers();
1480 // Skip things that can exist before keywords like 'if' and 'case'.
1481 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1482 Keywords.kw_unique0)) {
1483 nextToken();
1484 }
1485
1486 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1487 parseForOrWhileLoop(/*HasParens=*/false);
1488 return;
1489 }
1490 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1491 parseForOrWhileLoop();
1492 return;
1493 }
1494 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1495 Keywords.kw_assume, Keywords.kw_cover)) {
1496 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1497 return;
1498 }
1499 }
1500
1501 // Tokens that only make sense at the beginning of a line.
1502 if (FormatTok->isAccessSpecifierKeyword()) {
1503 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1504 nextToken();
1505 else
1506 parseAccessSpecifier();
1507 return;
1508 }
1509 switch (FormatTok->Tok.getKind()) {
1510 case tok::kw_asm: {
1511 // Track whether to skip formatting inline asm by finalizing the tokens
1512 // in the block. Formatting is skipped inside of braces by default.
1513 // A style option could be added to also skip formatting inside parens.
1514 bool DoNotFormat = false;
1515 tok::TokenKind OpenType;
1516 tok::TokenKind CloseType;
1517 nextToken();
1518 while (FormatTok &&
1519 FormatTok->isOneOf(tok::kw_volatile, tok::kw_inline, tok::kw_goto)) {
1520 nextToken();
1521 }
1522 if (!FormatTok)
1523 break;
1524 if (FormatTok->is(tok::l_brace)) {
1525 FormatTok->setFinalizedType(TT_InlineASMBrace);
1526 OpenType = tok::l_brace;
1527 CloseType = tok::r_brace;
1528 DoNotFormat = true;
1529 } else if (FormatTok->is(tok::l_paren)) {
1530 OpenType = tok::l_paren;
1531 CloseType = tok::r_paren;
1532 FormatTok->setFinalizedType(TT_InlineASMParen);
1533 } else {
1534 break;
1535 }
1536 if (DoNotFormat) {
1537 FormatToken *OpenTok = FormatTok;
1538 int NestLevel = 0;
1539 nextToken();
1540 while (FormatTok && !eof()) {
1541 if (FormatTok->is(OpenType)) {
1542 ++NestLevel;
1543 } else if (FormatTok->is(CloseType)) {
1544 --NestLevel;
1545 if (NestLevel < 1) {
1546 FormatTok->setFinalizedType(OpenTok->getType());
1547 nextToken();
1548 addUnwrappedLine();
1549 break;
1550 }
1551 }
1552 FormatTok->Finalized = true;
1553 nextToken();
1554 }
1555 }
1556 break;
1557 }
1558 case tok::kw_namespace:
1559 parseNamespace();
1560 return;
1561 case tok::kw_if: {
1562 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1563 // field/method declaration.
1564 break;
1565 }
1566 FormatToken *Tok = parseIfThenElse(IfKind);
1567 if (IfLeftBrace)
1568 *IfLeftBrace = Tok;
1569 return;
1570 }
1571 case tok::kw_for:
1572 case tok::kw_while:
1573 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1574 // field/method declaration.
1575 break;
1576 }
1577 parseForOrWhileLoop();
1578 return;
1579 case tok::kw_do:
1580 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1581 // field/method declaration.
1582 break;
1583 }
1584 parseDoWhile();
1585 if (HasDoWhile)
1586 *HasDoWhile = true;
1587 return;
1588 case tok::kw_switch:
1589 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1590 // 'switch: string' field declaration.
1591 break;
1592 }
1593 parseSwitch(/*IsExpr=*/false);
1594 return;
1595 case tok::kw_default: {
1596 // In Verilog default along with other labels are handled in the next loop.
1597 if (Style.isVerilog())
1598 break;
1599 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1600 // 'default: string' field declaration.
1601 break;
1602 }
1603 auto *Default = FormatTok;
1604 nextToken();
1605 if (FormatTok->is(tok::colon)) {
1606 FormatTok->setFinalizedType(TT_CaseLabelColon);
1607 parseLabel();
1608 return;
1609 }
1610 if (FormatTok->is(tok::arrow)) {
1611 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1612 Default->setFinalizedType(TT_SwitchExpressionLabel);
1613 parseLabel();
1614 return;
1615 }
1616 // e.g. "default void f() {}" in a Java interface.
1617 break;
1618 }
1619 case tok::kw_case:
1620 // Proto: there are no switch/case statements.
1621 if (Style.Language == FormatStyle::LK_Proto) {
1622 nextToken();
1623 return;
1624 }
1625 if (Style.isVerilog()) {
1626 parseBlock();
1627 addUnwrappedLine();
1628 return;
1629 }
1630 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1631 // 'case: string' field declaration.
1632 nextToken();
1633 break;
1634 }
1635 parseCaseLabel();
1636 return;
1637 case tok::kw_goto:
1638 nextToken();
1639 if (FormatTok->is(tok::kw_case))
1640 nextToken();
1641 break;
1642 case tok::kw_try:
1643 case tok::kw___try:
1644 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1645 // field/method declaration.
1646 break;
1647 }
1648 parseTryCatch();
1649 return;
1650 case tok::kw_extern:
1651 if (Style.isVerilog()) {
1652 // In Verilog an extern module declaration looks like a start of module.
1653 // But there is no body and endmodule. So we handle it separately.
1654 parseVerilogExtern();
1655 return;
1656 }
1657 nextToken();
1658 if (FormatTok->is(tok::string_literal)) {
1659 nextToken();
1660 if (FormatTok->is(tok::l_brace)) {
1661 if (Style.BraceWrapping.AfterExternBlock)
1662 addUnwrappedLine();
1663 // Either we indent or for backwards compatibility we follow the
1664 // AfterExternBlock style.
1665 unsigned AddLevels =
1666 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1667 (Style.BraceWrapping.AfterExternBlock &&
1668 Style.IndentExternBlock ==
1669 FormatStyle::IEBS_AfterExternBlock)
1670 ? 1u
1671 : 0u;
1672 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1673 addUnwrappedLine();
1674 return;
1675 }
1676 }
1677 break;
1678 case tok::kw_export:
1679 if (IsCpp) {
1680 nextToken();
1681 if (FormatTok->is(tok::kw_namespace)) {
1682 parseNamespace();
1683 return;
1684 }
1685 if (FormatTok->is(tok::l_brace)) {
1686 parseCppExportBlock();
1687 return;
1688 }
1689 if (FormatTok->is(Keywords.kw_module) && parseModuleDecl())
1690 return;
1691 if (FormatTok->is(Keywords.kw_import) && parseImportDecl())
1692 return;
1693 break;
1694 }
1695 if (Style.isJavaScript()) {
1696 parseJavaScriptEs6ImportExport();
1697 return;
1698 }
1699 if (Style.isVerilog()) {
1700 parseVerilogExtern();
1701 return;
1702 }
1703 break;
1704 case tok::kw_inline:
1705 nextToken();
1706 if (FormatTok->is(tok::kw_namespace)) {
1707 parseNamespace();
1708 return;
1709 }
1710 break;
1711 case tok::identifier:
1712 if (FormatTok->is(TT_ForEachMacro)) {
1713 parseForOrWhileLoop();
1714 return;
1715 }
1716 if (FormatTok->is(TT_MacroBlockBegin)) {
1717 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1718 /*MunchSemi=*/false);
1719 return;
1720 }
1721 if (FormatTok->is(Keywords.kw_import)) {
1722 if (IsCpp && parseImportDecl())
1723 return;
1724 if (Style.isJavaScript()) {
1725 parseJavaScriptEs6ImportExport();
1726 return;
1727 }
1728 if (Style.Language == FormatStyle::LK_Proto) {
1729 nextToken();
1730 if (FormatTok->is(tok::kw_public))
1731 nextToken();
1732 if (FormatTok->isNot(tok::string_literal))
1733 return;
1734 nextToken();
1735 if (FormatTok->is(tok::semi))
1736 nextToken();
1737 addUnwrappedLine();
1738 return;
1739 }
1740 if (Style.isVerilog()) {
1741 parseVerilogExtern();
1742 return;
1743 }
1744 }
1745 if (IsCpp) {
1746 if (FormatTok->is(Keywords.kw_module) && parseModuleDecl())
1747 return;
1748 if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1749 Keywords.kw_slots, Keywords.kw_qslots)) {
1750 nextToken();
1751 if (FormatTok->is(tok::colon)) {
1752 nextToken();
1753 addUnwrappedLine();
1754 return;
1755 }
1756 }
1757 if (FormatTok->is(TT_StatementMacro)) {
1758 parseStatementMacro();
1759 return;
1760 }
1761 if (FormatTok->is(TT_NamespaceMacro)) {
1762 parseNamespace();
1763 return;
1764 }
1765 }
1766 // In Verilog labels can be any expression, so we don't do them here.
1767 // JS doesn't have macros, and within classes colons indicate fields, not
1768 // labels.
1769 // TableGen doesn't have labels.
1770 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1771 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1772 nextToken();
1773 if (!Line->InMacroBody || CurrentLines->size() > 1)
1774 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1775 FormatTok->setFinalizedType(TT_GotoLabelColon);
1776 parseLabel(Style.IndentGotoLabels);
1777 if (HasLabel)
1778 *HasLabel = true;
1779 return;
1780 }
1781 if (Style.isJava() && FormatTok->is(Keywords.kw_record)) {
1782 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1783 addUnwrappedLine();
1784 return;
1785 }
1786 // In all other cases, parse the declaration.
1787 break;
1788 default:
1789 break;
1790 }
1791
1792 bool SeenEqual = false;
1793 for (const bool InRequiresExpression =
1794 OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
1795 TT_CompoundRequirementLBrace);
1796 !eof();) {
1797 const FormatToken *Previous = FormatTok->Previous;
1798 switch (FormatTok->Tok.getKind()) {
1799 case tok::at:
1800 nextToken();
1801 if (FormatTok->is(tok::l_brace)) {
1802 nextToken();
1803 parseBracedList();
1804 break;
1805 }
1806 if (Style.isJava() && FormatTok->is(Keywords.kw_interface)) {
1807 nextToken();
1808 break;
1809 }
1810 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1811 case tok::objc_public:
1812 case tok::objc_protected:
1813 case tok::objc_package:
1814 case tok::objc_private:
1815 return parseAccessSpecifier();
1816 case tok::objc_interface:
1817 case tok::objc_implementation:
1818 return parseObjCInterfaceOrImplementation();
1819 case tok::objc_protocol:
1820 if (parseObjCProtocol())
1821 return;
1822 break;
1823 case tok::objc_end:
1824 return; // Handled by the caller.
1825 case tok::objc_optional:
1826 case tok::objc_required:
1827 nextToken();
1828 addUnwrappedLine();
1829 return;
1830 case tok::objc_autoreleasepool:
1831 IsAutoRelease = true;
1832 [[fallthrough]];
1833 case tok::objc_synchronized:
1834 nextToken();
1835 if (!IsAutoRelease && FormatTok->is(tok::l_paren)) {
1836 // Skip synchronization object
1837 parseParens();
1838 }
1839 if (FormatTok->is(tok::l_brace)) {
1840 if (Style.BraceWrapping.AfterControlStatement ==
1841 FormatStyle::BWACS_Always) {
1842 addUnwrappedLine();
1843 }
1844 parseBlock();
1845 }
1846 addUnwrappedLine();
1847 return;
1848 case tok::objc_try:
1849 // This branch isn't strictly necessary (the kw_try case below would
1850 // do this too after the tok::at is parsed above). But be explicit.
1851 parseTryCatch();
1852 return;
1853 default:
1854 break;
1855 }
1856 break;
1857 case tok::kw_requires: {
1858 if (IsCpp) {
1859 bool ParsedClause = parseRequires(SeenEqual);
1860 if (ParsedClause)
1861 return;
1862 } else {
1863 nextToken();
1864 }
1865 break;
1866 }
1867 case tok::kw_enum:
1868 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1869 // "template <..., enum ...>".
1870 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1871 nextToken();
1872 break;
1873 }
1874
1875 // parseEnum falls through and does not yet add an unwrapped line as an
1876 // enum definition can start a structural element.
1877 if (!parseEnum())
1878 break;
1879 // This only applies to C++ and Verilog.
1880 if (!IsCpp && !Style.isVerilog()) {
1881 addUnwrappedLine();
1882 return;
1883 }
1884 break;
1885 case tok::kw_typedef:
1886 nextToken();
1887 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1888 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1889 Keywords.kw_CF_CLOSED_ENUM,
1890 Keywords.kw_NS_CLOSED_ENUM)) {
1891 parseEnum();
1892 }
1893 break;
1894 case tok::kw_class:
1895 if (Style.isVerilog()) {
1896 parseBlock();
1897 addUnwrappedLine();
1898 return;
1899 }
1900 if (Style.isTableGen()) {
1901 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1902 // This is same as def and so on.
1903 nextToken();
1904 break;
1905 }
1906 [[fallthrough]];
1907 case tok::kw_struct:
1908 case tok::kw_union:
1909 if (parseStructLike())
1910 return;
1911 break;
1912 case tok::kw_decltype:
1913 nextToken();
1914 if (FormatTok->is(tok::l_paren)) {
1915 parseParens();
1916 if (FormatTok->Previous &&
1917 FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1918 tok::l_paren)) {
1919 Line->SeenDecltypeAuto = true;
1920 }
1921 }
1922 break;
1923 case tok::period:
1924 nextToken();
1925 // In Java, classes have an implicit static member "class".
1926 if (Style.isJava() && FormatTok && FormatTok->is(tok::kw_class))
1927 nextToken();
1928 if (Style.isJavaScript() && FormatTok &&
1929 FormatTok->Tok.getIdentifierInfo()) {
1930 // JavaScript only has pseudo keywords, all keywords are allowed to
1931 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1932 nextToken();
1933 }
1934 break;
1935 case tok::semi:
1936 nextToken();
1937 addUnwrappedLine();
1938 return;
1939 case tok::r_brace:
1940 addUnwrappedLine();
1941 return;
1942 case tok::string_literal:
1943 if (Style.isVerilog() && FormatTok->is(TT_VerilogProtected)) {
1944 FormatTok->Finalized = true;
1945 nextToken();
1946 addUnwrappedLine();
1947 return;
1948 }
1949 nextToken();
1950 break;
1951 case tok::l_paren: {
1952 parseParens();
1953 // Break the unwrapped line if a K&R C function definition has a parameter
1954 // declaration.
1955 if (OpeningBrace || !IsCpp || !Previous || eof())
1956 break;
1957 if (isC78ParameterDecl(FormatTok,
1958 Tokens->peekNextToken(/*SkipComment=*/true),
1959 Previous)) {
1960 addUnwrappedLine();
1961 return;
1962 }
1963 break;
1964 }
1965 case tok::kw_operator:
1966 nextToken();
1967 if (FormatTok->isBinaryOperator())
1968 nextToken();
1969 break;
1970 case tok::caret: {
1971 const auto *Prev = FormatTok->getPreviousNonComment();
1972 nextToken();
1973 if (Prev && Prev->is(tok::identifier))
1974 break;
1975 // Block return type.
1976 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1977 nextToken();
1978 // Return types: pointers are ok too.
1979 while (FormatTok->is(tok::star))
1980 nextToken();
1981 }
1982 // Block argument list.
1983 if (FormatTok->is(tok::l_paren))
1984 parseParens();
1985 // Block body.
1986 if (FormatTok->is(tok::l_brace))
1987 parseChildBlock();
1988 break;
1989 }
1990 case tok::l_brace:
1991 if (InRequiresExpression)
1992 FormatTok->setFinalizedType(TT_BracedListLBrace);
1993 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1994 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1995 // A block outside of parentheses must be the last part of a
1996 // structural element.
1997 // FIXME: Figure out cases where this is not true, and add projections
1998 // for them (the one we know is missing are lambdas).
1999 if (Style.isJava() &&
2000 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
2001 // If necessary, we could set the type to something different than
2002 // TT_FunctionLBrace.
2003 if (Style.BraceWrapping.AfterControlStatement ==
2004 FormatStyle::BWACS_Always) {
2005 addUnwrappedLine();
2006 }
2007 } else if (Style.BraceWrapping.AfterFunction) {
2008 addUnwrappedLine();
2009 }
2010 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
2011 FormatTok->setFinalizedType(TT_FunctionLBrace);
2012 parseBlock();
2013 IsDecltypeAutoFunction = false;
2014 addUnwrappedLine();
2015 return;
2016 }
2017 // Otherwise this was a braced init list, and the structural
2018 // element continues.
2019 break;
2020 case tok::kw_try:
2021 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2022 // field/method declaration.
2023 nextToken();
2024 break;
2025 }
2026 // We arrive here when parsing function-try blocks.
2027 if (Style.BraceWrapping.AfterFunction)
2028 addUnwrappedLine();
2029 parseTryCatch();
2030 return;
2031 case tok::identifier: {
2032 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
2033 Line->MustBeDeclaration) {
2034 addUnwrappedLine();
2035 parseCSharpGenericTypeConstraint();
2036 break;
2037 }
2038 if (FormatTok->is(TT_MacroBlockEnd)) {
2039 addUnwrappedLine();
2040 return;
2041 }
2042
2043 // Function declarations (as opposed to function expressions) are parsed
2044 // on their own unwrapped line by continuing this loop. Function
2045 // expressions (functions that are not on their own line) must not create
2046 // a new unwrapped line, so they are special cased below.
2047 size_t TokenCount = Line->Tokens.size();
2048 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
2049 (TokenCount > 1 ||
2050 (TokenCount == 1 &&
2051 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
2052 tryToParseJSFunction();
2053 break;
2054 }
2055 if ((Style.isJavaScript() || Style.isJava()) &&
2056 FormatTok->is(Keywords.kw_interface)) {
2057 if (Style.isJavaScript()) {
2058 // In JavaScript/TypeScript, "interface" can be used as a standalone
2059 // identifier, e.g. in `var interface = 1;`. If "interface" is
2060 // followed by another identifier, it is very like to be an actual
2061 // interface declaration.
2062 unsigned StoredPosition = Tokens->getPosition();
2063 FormatToken *Next = Tokens->getNextToken();
2064 FormatTok = Tokens->setPosition(StoredPosition);
2065 if (!mustBeJSIdent(Keywords, Next)) {
2066 nextToken();
2067 break;
2068 }
2069 }
2070 parseRecord();
2071 addUnwrappedLine();
2072 return;
2073 }
2074
2075 if (Style.isVerilog()) {
2076 if (FormatTok->is(Keywords.kw_table)) {
2077 parseVerilogTable();
2078 return;
2079 }
2080 if (Keywords.isVerilogBegin(*FormatTok) ||
2081 Keywords.isVerilogHierarchy(*FormatTok)) {
2082 parseBlock();
2083 addUnwrappedLine();
2084 return;
2085 }
2086 }
2087
2088 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
2089 if (parseStructLike())
2090 return;
2091 break;
2092 }
2093
2094 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2095 parseStatementMacro();
2096 return;
2097 }
2098
2099 // See if the following token should start a new unwrapped line.
2100 StringRef Text = FormatTok->TokenText;
2101
2102 FormatToken *PreviousToken = FormatTok;
2103 nextToken();
2104
2105 // JS doesn't have macros, and within classes colons indicate fields, not
2106 // labels.
2107 if (Style.isJavaScript())
2108 break;
2109
2110 auto OneTokenSoFar = [&]() {
2111 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2112 while (I != E && I->Tok->is(tok::comment))
2113 ++I;
2114 if (Style.isVerilog())
2115 while (I != E && I->Tok->is(tok::hash))
2116 ++I;
2117 return I != E && (++I == E);
2118 };
2119 if (OneTokenSoFar()) {
2120 // Recognize function-like macro usages without trailing semicolon as
2121 // well as free-standing macros like Q_OBJECT.
2122 bool FunctionLike = FormatTok->is(tok::l_paren);
2123 if (FunctionLike)
2124 parseParens();
2125
2126 bool FollowedByNewline =
2127 CommentsBeforeNextToken.empty()
2128 ? FormatTok->NewlinesBefore > 0
2129 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2130
2131 if (FollowedByNewline &&
2132 (Text.size() >= 5 ||
2133 (FunctionLike && FormatTok->isNot(tok::l_paren))) &&
2134 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2135 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2136 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2137 addUnwrappedLine();
2138 return;
2139 }
2140 }
2141 break;
2142 }
2143 case tok::equal:
2144 if ((Style.isJavaScript() || Style.isCSharp()) &&
2145 FormatTok->is(TT_FatArrow)) {
2146 tryToParseChildBlock();
2147 break;
2148 }
2149
2150 SeenEqual = true;
2151 nextToken();
2152 if (FormatTok->is(tok::l_brace)) {
2153 // C# needs this change to ensure that array initialisers and object
2154 // initialisers are indented the same way. In TypeScript, the brace
2155 // can also be an object type definition.
2156 if (!Style.isJavaScript())
2157 FormatTok->setBlockKind(BK_BracedInit);
2158 // TableGen's defset statement has syntax of the form,
2159 // `defset <type> <name> = { <statement>... }`
2160 if (Style.isTableGen() &&
2161 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2162 FormatTok->setFinalizedType(TT_FunctionLBrace);
2163 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2164 /*MunchSemi=*/false);
2165 addUnwrappedLine();
2166 break;
2167 }
2168 nextToken();
2169 parseBracedList();
2170 } else if (Style.Language == FormatStyle::LK_Proto &&
2171 FormatTok->is(tok::less)) {
2172 nextToken();
2173 parseBracedList(/*IsAngleBracket=*/true);
2174 }
2175 break;
2176 case tok::l_square:
2177 parseSquare();
2178 break;
2179 case tok::kw_new:
2180 if (Style.isCSharp() &&
2181 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2182 (Previous && Previous->isAccessSpecifierKeyword()))) {
2183 nextToken();
2184 } else {
2185 parseNew();
2186 }
2187 break;
2188 case tok::kw_switch:
2189 if (Style.isJava())
2190 parseSwitch(/*IsExpr=*/true);
2191 else
2192 nextToken();
2193 break;
2194 case tok::kw_case:
2195 // Proto: there are no switch/case statements.
2196 if (Style.Language == FormatStyle::LK_Proto) {
2197 nextToken();
2198 return;
2199 }
2200 // In Verilog switch is called case.
2201 if (Style.isVerilog()) {
2202 parseBlock();
2203 addUnwrappedLine();
2204 return;
2205 }
2206 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2207 // 'case: string' field declaration.
2208 nextToken();
2209 break;
2210 }
2211 parseCaseLabel();
2212 break;
2213 case tok::kw_default:
2214 nextToken();
2215 if (Style.isVerilog()) {
2216 if (FormatTok->is(tok::colon)) {
2217 // The label will be handled in the next iteration.
2218 break;
2219 }
2220 if (FormatTok->is(Keywords.kw_clocking)) {
2221 // A default clocking block.
2222 parseBlock();
2223 addUnwrappedLine();
2224 return;
2225 }
2226 parseVerilogCaseLabel();
2227 return;
2228 }
2229 break;
2230 case tok::colon:
2231 nextToken();
2232 if (Style.isVerilog()) {
2233 parseVerilogCaseLabel();
2234 return;
2235 }
2236 break;
2237 case tok::greater:
2238 nextToken();
2239 if (FormatTok->is(tok::l_brace))
2240 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2241 break;
2242 default:
2243 nextToken();
2244 break;
2245 }
2246 }
2247}
2248
2249bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2250 assert(FormatTok->is(tok::l_brace));
2251 if (!Style.isCSharp())
2252 return false;
2253 // See if it's a property accessor.
2254 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier))
2255 return false;
2256
2257 // See if we are inside a property accessor.
2258 //
2259 // Record the current tokenPosition so that we can advance and
2260 // reset the current token. `Next` is not set yet so we need
2261 // another way to advance along the token stream.
2262 unsigned int StoredPosition = Tokens->getPosition();
2263 FormatToken *Tok = Tokens->getNextToken();
2264
2265 // A trivial property accessor is of the form:
2266 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2267 // Track these as they do not require line breaks to be introduced.
2268 bool HasSpecialAccessor = false;
2269 bool IsTrivialPropertyAccessor = true;
2270 bool HasAttribute = false;
2271 while (!eof()) {
2272 if (const bool IsAccessorKeyword =
2273 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set);
2274 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2275 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) {
2276 if (IsAccessorKeyword)
2277 HasSpecialAccessor = true;
2278 else if (Tok->is(tok::l_square))
2279 HasAttribute = true;
2280 Tok = Tokens->getNextToken();
2281 continue;
2282 }
2283 if (Tok->isNot(tok::r_brace))
2284 IsTrivialPropertyAccessor = false;
2285 break;
2286 }
2287
2288 if (!HasSpecialAccessor || HasAttribute) {
2289 Tokens->setPosition(StoredPosition);
2290 return false;
2291 }
2292
2293 // Try to parse the property accessor:
2294 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2295 Tokens->setPosition(StoredPosition);
2296 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2297 addUnwrappedLine();
2298 nextToken();
2299 do {
2300 switch (FormatTok->Tok.getKind()) {
2301 case tok::r_brace:
2302 nextToken();
2303 if (FormatTok->is(tok::equal)) {
2304 while (!eof() && FormatTok->isNot(tok::semi))
2305 nextToken();
2306 nextToken();
2307 }
2308 addUnwrappedLine();
2309 return true;
2310 case tok::l_brace:
2311 ++Line->Level;
2312 parseBlock(/*MustBeDeclaration=*/true);
2313 addUnwrappedLine();
2314 --Line->Level;
2315 break;
2316 case tok::equal:
2317 if (FormatTok->is(TT_FatArrow)) {
2318 ++Line->Level;
2319 do {
2320 nextToken();
2321 } while (!eof() && FormatTok->isNot(tok::semi));
2322 nextToken();
2323 addUnwrappedLine();
2324 --Line->Level;
2325 break;
2326 }
2327 nextToken();
2328 break;
2329 default:
2330 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2331 Keywords.kw_set) &&
2332 !IsTrivialPropertyAccessor) {
2333 // Non-trivial get/set needs to be on its own line.
2334 addUnwrappedLine();
2335 }
2336 nextToken();
2337 }
2338 } while (!eof());
2339
2340 // Unreachable for well-formed code (paired '{' and '}').
2341 return true;
2342}
2343
2344bool UnwrappedLineParser::tryToParseLambda() {
2345 assert(FormatTok->is(tok::l_square));
2346 if (!IsCpp) {
2347 nextToken();
2348 return false;
2349 }
2350 FormatToken &LSquare = *FormatTok;
2351 if (!tryToParseLambdaIntroducer())
2352 return false;
2353
2354 FormatToken *Arrow = nullptr;
2355 bool InTemplateParameterList = false;
2356
2357 while (FormatTok->isNot(tok::l_brace)) {
2358 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2359 nextToken();
2360 continue;
2361 }
2362 switch (FormatTok->Tok.getKind()) {
2363 case tok::l_brace:
2364 break;
2365 case tok::l_paren:
2366 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2367 break;
2368 case tok::l_square:
2369 parseSquare();
2370 break;
2371 case tok::less:
2372 assert(FormatTok->Previous);
2373 if (FormatTok->Previous->is(tok::r_square))
2374 InTemplateParameterList = true;
2375 nextToken();
2376 break;
2377 case tok::kw_auto:
2378 case tok::kw_class:
2379 case tok::kw_struct:
2380 case tok::kw_union:
2381 case tok::kw_template:
2382 case tok::kw_typename:
2383 case tok::amp:
2384 case tok::star:
2385 case tok::kw_const:
2386 case tok::kw_constexpr:
2387 case tok::kw_consteval:
2388 case tok::comma:
2389 case tok::greater:
2390 case tok::identifier:
2391 case tok::numeric_constant:
2392 case tok::coloncolon:
2393 case tok::kw_mutable:
2394 case tok::kw_noexcept:
2395 case tok::kw_static:
2396 nextToken();
2397 break;
2398 // Specialization of a template with an integer parameter can contain
2399 // arithmetic, logical, comparison and ternary operators.
2400 //
2401 // FIXME: This also accepts sequences of operators that are not in the scope
2402 // of a template argument list.
2403 //
2404 // In a C++ lambda a template type can only occur after an arrow. We use
2405 // this as an heuristic to distinguish between Objective-C expressions
2406 // followed by an `a->b` expression, such as:
2407 // ([obj func:arg] + a->b)
2408 // Otherwise the code below would parse as a lambda.
2409 case tok::plus:
2410 case tok::minus:
2411 case tok::exclaim:
2412 case tok::tilde:
2413 case tok::slash:
2414 case tok::percent:
2415 case tok::lessless:
2416 case tok::pipe:
2417 case tok::pipepipe:
2418 case tok::ampamp:
2419 case tok::caret:
2420 case tok::equalequal:
2421 case tok::exclaimequal:
2422 case tok::greaterequal:
2423 case tok::lessequal:
2424 case tok::question:
2425 case tok::colon:
2426 case tok::ellipsis:
2427 case tok::kw_true:
2428 case tok::kw_false:
2429 if (Arrow || InTemplateParameterList) {
2430 nextToken();
2431 break;
2432 }
2433 return true;
2434 case tok::arrow:
2435 Arrow = FormatTok;
2436 nextToken();
2437 break;
2438 case tok::kw_requires:
2439 parseRequiresClause();
2440 break;
2441 case tok::equal:
2442 if (!InTemplateParameterList)
2443 return true;
2444 nextToken();
2445 break;
2446 default:
2447 return true;
2448 }
2449 }
2450
2451 FormatTok->setFinalizedType(TT_LambdaLBrace);
2452 LSquare.setFinalizedType(TT_LambdaLSquare);
2453
2454 if (Arrow)
2455 Arrow->setFinalizedType(TT_LambdaArrow);
2456
2457 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2458 parseChildBlock();
2459 assert(!NestedLambdas.empty());
2460 NestedLambdas.pop_back();
2461
2462 return true;
2463}
2464
2465bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2466 const FormatToken *Previous = FormatTok->Previous;
2467 const FormatToken *LeftSquare = FormatTok;
2468 nextToken();
2469 if (Previous) {
2470 const auto *PrevPrev = Previous->getPreviousNonComment();
2471 if (Previous->is(tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2472 return false;
2473 if (Previous->closesScope()) {
2474 // Not a potential C-style cast.
2475 if (Previous->isNot(tok::r_paren))
2476 return false;
2477 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2478 // and `int (*)()`.
2479 if (!PrevPrev || PrevPrev->isNoneOf(tok::greater, tok::r_paren))
2480 return false;
2481 }
2482 if (Previous && Previous->Tok.getIdentifierInfo() &&
2483 Previous->isNoneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
2484 tok::kw_co_return)) {
2485 return false;
2486 }
2487 }
2488 if (LeftSquare->isCppStructuredBinding(IsCpp))
2489 return false;
2490 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2491 return false;
2492 if (FormatTok->is(tok::r_square)) {
2493 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2494 if (Next->is(tok::greater))
2495 return false;
2496 }
2497 parseSquare(/*LambdaIntroducer=*/true);
2498 return true;
2499}
2500
2501void UnwrappedLineParser::tryToParseJSFunction() {
2502 assert(FormatTok->is(Keywords.kw_function));
2503 if (FormatTok->is(Keywords.kw_async))
2504 nextToken();
2505 // Consume "function".
2506 nextToken();
2507
2508 // Consume * (generator function). Treat it like C++'s overloaded operators.
2509 if (FormatTok->is(tok::star)) {
2510 FormatTok->setFinalizedType(TT_OverloadedOperator);
2511 nextToken();
2512 }
2513
2514 // Consume function name.
2515 if (FormatTok->is(tok::identifier))
2516 nextToken();
2517
2518 if (FormatTok->isNot(tok::l_paren))
2519 return;
2520
2521 // Parse formal parameter list.
2522 parseParens();
2523
2524 if (FormatTok->is(tok::colon)) {
2525 // Parse a type definition.
2526 nextToken();
2527
2528 // Eat the type declaration. For braced inline object types, balance braces,
2529 // otherwise just parse until finding an l_brace for the function body.
2530 if (FormatTok->is(tok::l_brace))
2531 tryToParseBracedList();
2532 else
2533 while (FormatTok->isNoneOf(tok::l_brace, tok::semi) && !eof())
2534 nextToken();
2535 }
2536
2537 if (FormatTok->is(tok::semi))
2538 return;
2539
2540 parseChildBlock();
2541}
2542
2543bool UnwrappedLineParser::tryToParseBracedList() {
2544 if (FormatTok->is(BK_Unknown))
2545 calculateBraceTypes();
2546 assert(FormatTok->isNot(BK_Unknown));
2547 if (FormatTok->is(BK_Block))
2548 return false;
2549 nextToken();
2550 parseBracedList();
2551 return true;
2552}
2553
2554bool UnwrappedLineParser::tryToParseChildBlock() {
2555 assert(Style.isJavaScript() || Style.isCSharp());
2556 assert(FormatTok->is(TT_FatArrow));
2557 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2558 // They always start an expression or a child block if followed by a curly
2559 // brace.
2560 nextToken();
2561 if (FormatTok->isNot(tok::l_brace))
2562 return false;
2563 parseChildBlock();
2564 return true;
2565}
2566
2567bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2568 assert(!IsAngleBracket || !IsEnum);
2569 bool HasError = false;
2570
2571 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2572 // replace this by using parseAssignmentExpression() inside.
2573 do {
2574 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2575 tryToParseChildBlock()) {
2576 continue;
2577 }
2578 if (Style.isJavaScript()) {
2579 if (FormatTok->is(Keywords.kw_function)) {
2580 tryToParseJSFunction();
2581 continue;
2582 }
2583 if (FormatTok->is(tok::l_brace)) {
2584 // Could be a method inside of a braced list `{a() { return 1; }}`.
2585 if (tryToParseBracedList())
2586 continue;
2587 parseChildBlock();
2588 }
2589 }
2590 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2591 if (IsEnum) {
2592 FormatTok->setBlockKind(BK_Block);
2593 if (!Style.AllowShortEnumsOnASingleLine)
2594 addUnwrappedLine();
2595 }
2596 nextToken();
2597 return !HasError;
2598 }
2599 switch (FormatTok->Tok.getKind()) {
2600 case tok::l_square:
2601 if (Style.isCSharp())
2602 parseSquare();
2603 else
2604 tryToParseLambda();
2605 break;
2606 case tok::l_paren:
2607 parseParens();
2608 // JavaScript can just have free standing methods and getters/setters in
2609 // object literals. Detect them by a "{" following ")".
2610 if (Style.isJavaScript()) {
2611 if (FormatTok->is(tok::l_brace))
2612 parseChildBlock();
2613 break;
2614 }
2615 break;
2616 case tok::l_brace:
2617 // Assume there are no blocks inside a braced init list apart
2618 // from the ones we explicitly parse out (like lambdas).
2619 FormatTok->setBlockKind(BK_BracedInit);
2620 if (!IsAngleBracket) {
2621 auto *Prev = FormatTok->Previous;
2622 if (Prev && Prev->is(tok::greater))
2623 Prev->setFinalizedType(TT_TemplateCloser);
2624 }
2625 nextToken();
2626 parseBracedList();
2627 break;
2628 case tok::less:
2629 nextToken();
2630 if (IsAngleBracket)
2631 parseBracedList(/*IsAngleBracket=*/true);
2632 break;
2633 case tok::semi:
2634 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2635 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2636 // used for error recovery if we have otherwise determined that this is
2637 // a braced list.
2638 if (Style.isJavaScript()) {
2639 nextToken();
2640 break;
2641 }
2642 HasError = true;
2643 if (!IsEnum)
2644 return false;
2645 nextToken();
2646 break;
2647 case tok::comma:
2648 nextToken();
2649 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2650 addUnwrappedLine();
2651 break;
2652 case tok::kw_requires:
2653 parseRequiresExpression();
2654 break;
2655 default:
2656 nextToken();
2657 break;
2658 }
2659 } while (!eof());
2660 return false;
2661}
2662
2663/// Parses a pair of parentheses (and everything between them).
2664/// \param StarAndAmpTokenType If different than TT_Unknown sets this type for
2665/// all (double) ampersands and stars. This applies for all nested scopes as
2666/// well.
2667///
2668/// Returns whether there is a `=` token between the parentheses.
2669bool UnwrappedLineParser::parseParens(TokenType StarAndAmpTokenType,
2670 bool InMacroCall) {
2671 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2672 auto *LParen = FormatTok;
2673 auto *Prev = FormatTok->Previous;
2674 bool SeenComma = false;
2675 bool SeenEqual = false;
2676 bool MightBeFoldExpr = false;
2677 nextToken();
2678 const bool MightBeStmtExpr = FormatTok->is(tok::l_brace);
2679 if (!InMacroCall && Prev && Prev->is(TT_FunctionLikeMacro))
2680 InMacroCall = true;
2681 do {
2682 switch (FormatTok->Tok.getKind()) {
2683 case tok::l_paren:
2684 if (parseParens(StarAndAmpTokenType, InMacroCall))
2685 SeenEqual = true;
2686 if (Style.isJava() && FormatTok->is(tok::l_brace))
2687 parseChildBlock();
2688 break;
2689 case tok::r_paren: {
2690 auto *RParen = FormatTok;
2691 nextToken();
2692 if (Prev) {
2693 auto OptionalParens = [&] {
2694 if (Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2695 MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2696 Line->InMacroBody || RParen->getPreviousNonComment() == LParen) {
2697 return false;
2698 }
2699 const bool DoubleParens =
2700 Prev->is(tok::l_paren) && FormatTok->is(tok::r_paren);
2701 if (DoubleParens) {
2702 const auto *PrevPrev = Prev->getPreviousNonComment();
2703 const bool Excluded =
2704 PrevPrev &&
2705 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2706 (SeenEqual &&
2707 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2708 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2709 if (!Excluded)
2710 return true;
2711 } else {
2712 const bool CommaSeparated =
2713 Prev->isOneOf(tok::l_paren, tok::comma) &&
2714 FormatTok->isOneOf(tok::comma, tok::r_paren);
2715 if (CommaSeparated &&
2716 // LParen is not preceded by ellipsis, comma.
2717 !Prev->endsSequence(tok::comma, tok::ellipsis) &&
2718 // RParen is not followed by comma, ellipsis.
2719 !(FormatTok->is(tok::comma) &&
2720 Tokens->peekNextToken()->is(tok::ellipsis))) {
2721 return true;
2722 }
2723 const bool ReturnParens =
2724 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2725 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2726 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2727 Prev->isOneOf(tok::kw_return, tok::kw_co_return) &&
2728 FormatTok->is(tok::semi);
2729 if (ReturnParens)
2730 return true;
2731 }
2732 return false;
2733 };
2734 if (OptionalParens()) {
2735 LParen->Optional = true;
2736 RParen->Optional = true;
2737 } else if (Prev->is(TT_TypenameMacro)) {
2738 LParen->setFinalizedType(TT_TypeDeclarationParen);
2739 RParen->setFinalizedType(TT_TypeDeclarationParen);
2740 } else if (Prev->is(tok::greater) && RParen->Previous == LParen) {
2741 Prev->setFinalizedType(TT_TemplateCloser);
2742 } else if (FormatTok->is(tok::l_brace) && Prev->is(tok::amp) &&
2743 !Prev->Previous) {
2744 FormatTok->setBlockKind(BK_BracedInit);
2745 }
2746 }
2747 return SeenEqual;
2748 }
2749 case tok::r_brace:
2750 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2751 return SeenEqual;
2752 case tok::l_square:
2753 tryToParseLambda();
2754 break;
2755 case tok::l_brace:
2756 if (!tryToParseBracedList())
2757 parseChildBlock();
2758 break;
2759 case tok::at:
2760 nextToken();
2761 if (FormatTok->is(tok::l_brace)) {
2762 nextToken();
2763 parseBracedList();
2764 }
2765 break;
2766 case tok::comma:
2767 SeenComma = true;
2768 nextToken();
2769 break;
2770 case tok::ellipsis:
2771 MightBeFoldExpr = true;
2772 nextToken();
2773 break;
2774 case tok::equal:
2775 SeenEqual = true;
2776 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2777 tryToParseChildBlock();
2778 else
2779 nextToken();
2780 break;
2781 case tok::kw_class:
2782 if (Style.isJavaScript())
2783 parseRecord(/*ParseAsExpr=*/true);
2784 else
2785 nextToken();
2786 break;
2787 case tok::identifier:
2788 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2789 tryToParseJSFunction();
2790 else
2791 nextToken();
2792 break;
2793 case tok::kw_switch:
2794 if (Style.isJava())
2795 parseSwitch(/*IsExpr=*/true);
2796 else
2797 nextToken();
2798 break;
2799 case tok::kw_requires:
2800 parseRequiresExpression();
2801 break;
2802 case tok::star:
2803 case tok::amp:
2804 case tok::ampamp:
2805 if (StarAndAmpTokenType != TT_Unknown)
2806 FormatTok->setFinalizedType(StarAndAmpTokenType);
2807 [[fallthrough]];
2808 default:
2809 nextToken();
2810 break;
2811 }
2812 } while (!eof());
2813 return SeenEqual;
2814}
2815
2816void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2817 if (!LambdaIntroducer) {
2818 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2819 if (tryToParseLambda())
2820 return;
2821 }
2822 do {
2823 switch (FormatTok->Tok.getKind()) {
2824 case tok::l_paren:
2825 parseParens();
2826 break;
2827 case tok::r_square:
2828 nextToken();
2829 return;
2830 case tok::r_brace:
2831 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2832 return;
2833 case tok::l_square:
2834 parseSquare();
2835 break;
2836 case tok::l_brace: {
2837 if (!tryToParseBracedList())
2838 parseChildBlock();
2839 break;
2840 }
2841 case tok::at:
2842 case tok::colon:
2843 nextToken();
2844 if (FormatTok->is(tok::l_brace)) {
2845 nextToken();
2846 parseBracedList();
2847 }
2848 break;
2849 default:
2850 nextToken();
2851 break;
2852 }
2853 } while (!eof());
2854}
2855
2856void UnwrappedLineParser::keepAncestorBraces() {
2857 if (!Style.RemoveBracesLLVM)
2858 return;
2859
2860 const int MaxNestingLevels = 2;
2861 const int Size = NestedTooDeep.size();
2862 if (Size >= MaxNestingLevels)
2863 NestedTooDeep[Size - MaxNestingLevels] = true;
2864 NestedTooDeep.push_back(false);
2865}
2866
2868 for (const auto &Token : llvm::reverse(Line.Tokens))
2869 if (Token.Tok->isNot(tok::comment))
2870 return Token.Tok;
2871
2872 return nullptr;
2873}
2874
2875void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2876 FormatToken *Tok = nullptr;
2877
2878 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2879 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2880 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2881 ? getLastNonComment(*Line)
2882 : Line->Tokens.back().Tok;
2883 assert(Tok);
2884 if (Tok->BraceCount < 0) {
2885 assert(Tok->BraceCount == -1);
2886 Tok = nullptr;
2887 } else {
2888 Tok->BraceCount = -1;
2889 }
2890 }
2891
2892 addUnwrappedLine();
2893 ++Line->Level;
2894 ++Line->UnbracedBodyLevel;
2895 parseStructuralElement();
2896 --Line->UnbracedBodyLevel;
2897
2898 if (Tok) {
2899 assert(!Line->InPPDirective);
2900 Tok = nullptr;
2901 for (const auto &L : llvm::reverse(*CurrentLines)) {
2902 if (!L.InPPDirective && getLastNonComment(L)) {
2903 Tok = L.Tokens.back().Tok;
2904 break;
2905 }
2906 }
2907 assert(Tok);
2908 ++Tok->BraceCount;
2909 }
2910
2911 if (CheckEOF && eof())
2912 addUnwrappedLine();
2913
2914 --Line->Level;
2915}
2916
2917static void markOptionalBraces(FormatToken *LeftBrace) {
2918 if (!LeftBrace)
2919 return;
2920
2921 assert(LeftBrace->is(tok::l_brace));
2922
2923 FormatToken *RightBrace = LeftBrace->MatchingParen;
2924 if (!RightBrace) {
2925 assert(!LeftBrace->Optional);
2926 return;
2927 }
2928
2929 assert(RightBrace->is(tok::r_brace));
2930 assert(RightBrace->MatchingParen == LeftBrace);
2931 assert(LeftBrace->Optional == RightBrace->Optional);
2932
2933 LeftBrace->Optional = true;
2934 RightBrace->Optional = true;
2935}
2936
2937void UnwrappedLineParser::handleAttributes() {
2938 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2939 if (FormatTok->isAttribute())
2940 nextToken();
2941 else if (FormatTok->is(tok::l_square))
2942 handleCppAttributes();
2943}
2944
2945bool UnwrappedLineParser::handleCppAttributes() {
2946 // Handle [[likely]] / [[unlikely]] attributes.
2947 assert(FormatTok->is(tok::l_square));
2948 if (!tryToParseSimpleAttribute())
2949 return false;
2950 parseSquare();
2951 return true;
2952}
2953
2954/// Returns whether \c Tok begins a block.
2955bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2956 // FIXME: rename the function or make
2957 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2958 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2959 : Tok.is(tok::l_brace);
2960}
2961
2962FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2963 bool KeepBraces,
2964 bool IsVerilogAssert) {
2965 assert((FormatTok->is(tok::kw_if) ||
2966 (Style.isVerilog() &&
2967 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2968 Keywords.kw_assume, Keywords.kw_cover))) &&
2969 "'if' expected");
2970 nextToken();
2971
2972 if (IsVerilogAssert) {
2973 // Handle `assert #0` and `assert final`.
2974 if (FormatTok->is(Keywords.kw_verilogHash)) {
2975 nextToken();
2976 if (FormatTok->is(tok::numeric_constant))
2977 nextToken();
2978 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2979 Keywords.kw_sequence)) {
2980 nextToken();
2981 }
2982 }
2983
2984 // TableGen's if statement has the form of `if <cond> then { ... }`.
2985 if (Style.isTableGen()) {
2986 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2987 // Simply skip until then. This range only contains a value.
2988 nextToken();
2989 }
2990 }
2991
2992 // Handle `if !consteval`.
2993 if (FormatTok->is(tok::exclaim))
2994 nextToken();
2995
2996 bool KeepIfBraces = true;
2997 if (FormatTok->is(tok::kw_consteval)) {
2998 nextToken();
2999 } else {
3000 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
3001 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
3002 nextToken();
3003 if (FormatTok->is(tok::l_paren)) {
3004 FormatTok->setFinalizedType(TT_ConditionLParen);
3005 parseParens();
3006 }
3007 }
3008 handleAttributes();
3009 // The then action is optional in Verilog assert statements.
3010 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
3011 nextToken();
3012 addUnwrappedLine();
3013 return nullptr;
3014 }
3015
3016 bool NeedsUnwrappedLine = false;
3017 keepAncestorBraces();
3018
3019 FormatToken *IfLeftBrace = nullptr;
3020 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
3021
3022 if (isBlockBegin(*FormatTok)) {
3023 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3024 IfLeftBrace = FormatTok;
3025 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3026 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3027 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
3028 setPreviousRBraceType(TT_ControlStatementRBrace);
3029 if (Style.BraceWrapping.BeforeElse)
3030 addUnwrappedLine();
3031 else
3032 NeedsUnwrappedLine = true;
3033 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
3034 addUnwrappedLine();
3035 } else {
3036 parseUnbracedBody();
3037 }
3038
3039 if (Style.RemoveBracesLLVM) {
3040 assert(!NestedTooDeep.empty());
3041 KeepIfBraces = KeepIfBraces ||
3042 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
3043 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
3044 IfBlockKind == IfStmtKind::IfElseIf;
3045 }
3046
3047 bool KeepElseBraces = KeepIfBraces;
3048 FormatToken *ElseLeftBrace = nullptr;
3049 IfStmtKind Kind = IfStmtKind::IfOnly;
3050
3051 if (FormatTok->is(tok::kw_else)) {
3052 if (Style.RemoveBracesLLVM) {
3053 NestedTooDeep.back() = false;
3054 Kind = IfStmtKind::IfElse;
3055 }
3056 nextToken();
3057 handleAttributes();
3058 if (isBlockBegin(*FormatTok)) {
3059 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
3060 FormatTok->setFinalizedType(TT_ElseLBrace);
3061 ElseLeftBrace = FormatTok;
3062 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3063 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
3064 FormatToken *IfLBrace =
3065 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3066 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
3067 setPreviousRBraceType(TT_ElseRBrace);
3068 if (FormatTok->is(tok::kw_else)) {
3069 KeepElseBraces = KeepElseBraces ||
3070 ElseBlockKind == IfStmtKind::IfOnly ||
3071 ElseBlockKind == IfStmtKind::IfElseIf;
3072 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
3073 KeepElseBraces = true;
3074 assert(ElseLeftBrace->MatchingParen);
3075 markOptionalBraces(ElseLeftBrace);
3076 }
3077 addUnwrappedLine();
3078 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
3079 const FormatToken *Previous = Tokens->getPreviousToken();
3080 assert(Previous);
3081 const bool IsPrecededByComment = Previous->is(tok::comment);
3082 if (IsPrecededByComment) {
3083 addUnwrappedLine();
3084 ++Line->Level;
3085 }
3086 bool TooDeep = true;
3087 if (Style.RemoveBracesLLVM) {
3088 Kind = IfStmtKind::IfElseIf;
3089 TooDeep = NestedTooDeep.pop_back_val();
3090 }
3091 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
3092 if (Style.RemoveBracesLLVM)
3093 NestedTooDeep.push_back(TooDeep);
3094 if (IsPrecededByComment)
3095 --Line->Level;
3096 } else {
3097 parseUnbracedBody(/*CheckEOF=*/true);
3098 }
3099 } else {
3100 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3101 if (NeedsUnwrappedLine)
3102 addUnwrappedLine();
3103 }
3104
3105 if (!Style.RemoveBracesLLVM)
3106 return nullptr;
3107
3108 assert(!NestedTooDeep.empty());
3109 KeepElseBraces = KeepElseBraces ||
3110 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3111 NestedTooDeep.back();
3112
3113 NestedTooDeep.pop_back();
3114
3115 if (!KeepIfBraces && !KeepElseBraces) {
3116 markOptionalBraces(IfLeftBrace);
3117 markOptionalBraces(ElseLeftBrace);
3118 } else if (IfLeftBrace) {
3119 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3120 if (IfRightBrace) {
3121 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3122 assert(!IfLeftBrace->Optional);
3123 assert(!IfRightBrace->Optional);
3124 IfLeftBrace->MatchingParen = nullptr;
3125 IfRightBrace->MatchingParen = nullptr;
3126 }
3127 }
3128
3129 if (IfKind)
3130 *IfKind = Kind;
3131
3132 return IfLeftBrace;
3133}
3134
3135void UnwrappedLineParser::parseTryCatch() {
3136 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3137 nextToken();
3138 bool NeedsUnwrappedLine = false;
3139 bool HasCtorInitializer = false;
3140 if (FormatTok->is(tok::colon)) {
3141 auto *Colon = FormatTok;
3142 // We are in a function try block, what comes is an initializer list.
3143 nextToken();
3144 if (FormatTok->is(tok::identifier)) {
3145 HasCtorInitializer = true;
3146 Colon->setFinalizedType(TT_CtorInitializerColon);
3147 }
3148
3149 // In case identifiers were removed by clang-tidy, what might follow is
3150 // multiple commas in sequence - before the first identifier.
3151 while (FormatTok->is(tok::comma))
3152 nextToken();
3153
3154 while (FormatTok->is(tok::identifier)) {
3155 nextToken();
3156 if (FormatTok->is(tok::l_paren)) {
3157 parseParens();
3158 } else if (FormatTok->is(tok::l_brace)) {
3159 nextToken();
3160 parseBracedList();
3161 }
3162
3163 // In case identifiers were removed by clang-tidy, what might follow is
3164 // multiple commas in sequence - after the first identifier.
3165 while (FormatTok->is(tok::comma))
3166 nextToken();
3167 }
3168 }
3169 // Parse try with resource.
3170 if (Style.isJava() && FormatTok->is(tok::l_paren))
3171 parseParens();
3172
3173 keepAncestorBraces();
3174
3175 if (FormatTok->is(tok::l_brace)) {
3176 if (HasCtorInitializer)
3177 FormatTok->setFinalizedType(TT_FunctionLBrace);
3178 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3179 parseBlock();
3180 if (Style.BraceWrapping.BeforeCatch)
3181 addUnwrappedLine();
3182 else
3183 NeedsUnwrappedLine = true;
3184 } else if (FormatTok->isNot(tok::kw_catch)) {
3185 // The C++ standard requires a compound-statement after a try.
3186 // If there's none, we try to assume there's a structuralElement
3187 // and try to continue.
3188 addUnwrappedLine();
3189 ++Line->Level;
3190 parseStructuralElement();
3191 --Line->Level;
3192 }
3193 for (bool SeenCatch = false;;) {
3194 if (FormatTok->is(tok::at))
3195 nextToken();
3196 if (FormatTok->isNoneOf(tok::kw_catch, Keywords.kw___except,
3197 tok::kw___finally, tok::objc_catch,
3198 tok::objc_finally) &&
3199 !((Style.isJava() || Style.isJavaScript()) &&
3200 FormatTok->is(Keywords.kw_finally))) {
3201 break;
3202 }
3203 if (FormatTok->is(tok::kw_catch))
3204 SeenCatch = true;
3205 nextToken();
3206 while (FormatTok->isNot(tok::l_brace)) {
3207 if (FormatTok->is(tok::l_paren)) {
3208 parseParens();
3209 continue;
3210 }
3211 if (FormatTok->isOneOf(tok::semi, tok::r_brace) || eof()) {
3212 if (Style.RemoveBracesLLVM)
3213 NestedTooDeep.pop_back();
3214 return;
3215 }
3216 nextToken();
3217 }
3218 if (SeenCatch) {
3219 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3220 SeenCatch = false;
3221 }
3222 NeedsUnwrappedLine = false;
3223 Line->MustBeDeclaration = false;
3224 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3225 parseBlock();
3226 if (Style.BraceWrapping.BeforeCatch)
3227 addUnwrappedLine();
3228 else
3229 NeedsUnwrappedLine = true;
3230 }
3231
3232 if (Style.RemoveBracesLLVM)
3233 NestedTooDeep.pop_back();
3234
3235 if (NeedsUnwrappedLine)
3236 addUnwrappedLine();
3237}
3238
3239void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3240 bool ManageWhitesmithsBraces =
3241 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3242
3243 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3244 // the whole block.
3245 if (ManageWhitesmithsBraces)
3246 ++Line->Level;
3247
3248 // Munch the semicolon after the block. This is more common than one would
3249 // think. Putting the semicolon into its own line is very ugly.
3250 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3251 /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces);
3252
3253 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3254
3255 if (ManageWhitesmithsBraces)
3256 --Line->Level;
3257}
3258
3259void UnwrappedLineParser::parseNamespace() {
3260 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3261 "'namespace' expected");
3262
3263 const FormatToken &InitialToken = *FormatTok;
3264 nextToken();
3265 if (InitialToken.is(TT_NamespaceMacro)) {
3266 parseParens();
3267 } else {
3268 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3269 tok::l_square, tok::period, tok::l_paren) ||
3270 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3271 if (FormatTok->is(tok::l_square))
3272 parseSquare();
3273 else if (FormatTok->is(tok::l_paren))
3274 parseParens();
3275 else
3276 nextToken();
3277 }
3278 }
3279 if (FormatTok->is(tok::l_brace)) {
3280 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3281
3282 if (ShouldBreakBeforeBrace(Style, InitialToken,
3283 Tokens->peekNextToken()->is(tok::r_brace))) {
3284 addUnwrappedLine();
3285 }
3286
3287 unsigned AddLevels =
3288 Style.NamespaceIndentation == FormatStyle::NI_All ||
3289 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3290 DeclarationScopeStack.size() > 1)
3291 ? 1u
3292 : 0u;
3293 parseNamespaceOrExportBlock(AddLevels);
3294 }
3295 // FIXME: Add error handling.
3296}
3297
3298void UnwrappedLineParser::parseCppExportBlock() {
3299 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3300}
3301
3302void UnwrappedLineParser::parseNew() {
3303 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3304 nextToken();
3305
3306 if (Style.isCSharp()) {
3307 do {
3308 // Handle constructor invocation, e.g. `new(field: value)`.
3309 if (FormatTok->is(tok::l_paren))
3310 parseParens();
3311
3312 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3313 if (FormatTok->is(tok::l_brace))
3314 parseBracedList();
3315
3316 if (FormatTok->isOneOf(tok::semi, tok::comma))
3317 return;
3318
3319 nextToken();
3320 } while (!eof());
3321 }
3322
3323 if (!Style.isJava())
3324 return;
3325
3326 // In Java, we can parse everything up to the parens, which aren't optional.
3327 do {
3328 // There should not be a ;, { or } before the new's open paren.
3329 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3330 return;
3331
3332 // Consume the parens.
3333 if (FormatTok->is(tok::l_paren)) {
3334 parseParens();
3335
3336 // If there is a class body of an anonymous class, consume that as child.
3337 if (FormatTok->is(tok::l_brace))
3338 parseChildBlock();
3339 return;
3340 }
3341 nextToken();
3342 } while (!eof());
3343}
3344
3345void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3346 keepAncestorBraces();
3347
3348 if (isBlockBegin(*FormatTok)) {
3349 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3350 FormatToken *LeftBrace = FormatTok;
3351 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3352 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3353 /*MunchSemi=*/true, KeepBraces);
3354 setPreviousRBraceType(TT_ControlStatementRBrace);
3355 if (!KeepBraces) {
3356 assert(!NestedTooDeep.empty());
3357 if (!NestedTooDeep.back())
3358 markOptionalBraces(LeftBrace);
3359 }
3360 if (WrapRightBrace)
3361 addUnwrappedLine();
3362 } else {
3363 parseUnbracedBody();
3364 }
3365
3366 if (!KeepBraces)
3367 NestedTooDeep.pop_back();
3368}
3369
3370void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3371 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3372 (Style.isVerilog() &&
3373 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3374 Keywords.kw_always_ff, Keywords.kw_always_latch,
3375 Keywords.kw_final, Keywords.kw_initial,
3376 Keywords.kw_foreach, Keywords.kw_forever,
3377 Keywords.kw_repeat))) &&
3378 "'for', 'while' or foreach macro expected");
3379 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3380 FormatTok->isNoneOf(tok::kw_for, tok::kw_while);
3381
3382 nextToken();
3383 // JS' for await ( ...
3384 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3385 nextToken();
3386 if (IsCpp && FormatTok->is(tok::kw_co_await))
3387 nextToken();
3388 if (HasParens && FormatTok->is(tok::l_paren)) {
3389 // The type is only set for Verilog basically because we were afraid to
3390 // change the existing behavior for loops. See the discussion on D121756 for
3391 // details.
3392 if (Style.isVerilog())
3393 FormatTok->setFinalizedType(TT_ConditionLParen);
3394 parseParens();
3395 }
3396
3397 if (Style.isVerilog()) {
3398 // Event control.
3399 parseVerilogSensitivityList();
3400 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3401 Tokens->getPreviousToken()->is(tok::r_paren)) {
3402 nextToken();
3403 addUnwrappedLine();
3404 return;
3405 }
3406
3407 handleAttributes();
3408 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3409}
3410
3411void UnwrappedLineParser::parseDoWhile() {
3412 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3413 nextToken();
3414
3415 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3416
3417 // FIXME: Add error handling.
3418 if (FormatTok->isNot(tok::kw_while)) {
3419 addUnwrappedLine();
3420 return;
3421 }
3422
3423 FormatTok->setFinalizedType(TT_DoWhile);
3424
3425 // If in Whitesmiths mode, the line with the while() needs to be indented
3426 // to the same level as the block.
3427 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3428 ++Line->Level;
3429
3430 nextToken();
3431 parseStructuralElement();
3432}
3433
3434void UnwrappedLineParser::parseLabel(
3435 FormatStyle::IndentGotoLabelStyle IndentGotoLabels) {
3436 const bool IsGotoLabel = FormatTok->is(TT_GotoLabelColon);
3437 nextToken();
3438 unsigned OldLineLevel = Line->Level;
3439
3440 switch (IndentGotoLabels) {
3441 case FormatStyle::IGLS_NoIndent:
3442 Line->Level = 0;
3443 break;
3444 case FormatStyle::IGLS_OuterIndent:
3445 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3446 --Line->Level;
3447 break;
3448 case FormatStyle::IGLS_HalfIndent:
3449 case FormatStyle::IGLS_InnerIndent:
3450 break;
3451 }
3452
3453 if (!IsGotoLabel && !Style.IndentCaseBlocks &&
3454 CommentsBeforeNextToken.empty() && FormatTok->is(tok::l_brace)) {
3455 CompoundStatementIndenter Indenter(this, Line->Level,
3456 Style.BraceWrapping.AfterCaseLabel,
3457 Style.BraceWrapping.IndentBraces);
3458 parseBlock();
3459 if (FormatTok->is(tok::kw_break)) {
3460 if (Style.BraceWrapping.AfterControlStatement ==
3461 FormatStyle::BWACS_Always) {
3462 addUnwrappedLine();
3463 if (!Style.IndentCaseBlocks &&
3464 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3465 ++Line->Level;
3466 }
3467 }
3468 parseStructuralElement();
3469 }
3470 addUnwrappedLine();
3471 } else {
3472 if (FormatTok->is(tok::semi))
3473 nextToken();
3474 addUnwrappedLine();
3475 }
3476 Line->Level = OldLineLevel;
3477 if (FormatTok->isNot(tok::l_brace)) {
3478 parseStructuralElement();
3479 addUnwrappedLine();
3480 }
3481}
3482
3483void UnwrappedLineParser::parseCaseLabel() {
3484 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3485 auto *Case = FormatTok;
3486
3487 // FIXME: fix handling of complex expressions here.
3488 do {
3489 nextToken();
3490 if (FormatTok->is(tok::colon)) {
3491 FormatTok->setFinalizedType(TT_CaseLabelColon);
3492 break;
3493 }
3494 if (Style.isJava() && FormatTok->is(tok::arrow)) {
3495 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3496 Case->setFinalizedType(TT_SwitchExpressionLabel);
3497 break;
3498 }
3499 } while (!eof());
3500 parseLabel();
3501}
3502
3503void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3504 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3505 nextToken();
3506 if (FormatTok->is(tok::l_paren))
3507 parseParens();
3508
3509 keepAncestorBraces();
3510
3511 if (FormatTok->is(tok::l_brace)) {
3512 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3513 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3514 : TT_ControlStatementLBrace);
3515 if (IsExpr)
3516 parseChildBlock();
3517 else
3518 parseBlock();
3519 setPreviousRBraceType(TT_ControlStatementRBrace);
3520 if (!IsExpr)
3521 addUnwrappedLine();
3522 } else {
3523 addUnwrappedLine();
3524 ++Line->Level;
3525 parseStructuralElement();
3526 --Line->Level;
3527 }
3528
3529 if (Style.RemoveBracesLLVM)
3530 NestedTooDeep.pop_back();
3531}
3532
3533void UnwrappedLineParser::parseAccessSpecifier() {
3534 nextToken();
3535 // Understand Qt's slots.
3536 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3537 nextToken();
3538 // Otherwise, we don't know what it is, and we'd better keep the next token.
3539 if (FormatTok->is(tok::colon))
3540 nextToken();
3541 addUnwrappedLine();
3542}
3543
3544/// Parses a requires, decides if it is a clause or an expression.
3545/// \pre The current token has to be the requires keyword.
3546/// \returns true if it parsed a clause.
3547bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3548 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3549
3550 // We try to guess if it is a requires clause, or a requires expression. For
3551 // that we first check the next token.
3552 switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) {
3553 case tok::l_brace:
3554 // This can only be an expression, never a clause.
3555 parseRequiresExpression();
3556 return false;
3557 case tok::l_paren:
3558 // Clauses and expression can start with a paren, it's unclear what we have.
3559 break;
3560 default:
3561 // All other tokens can only be a clause.
3562 parseRequiresClause();
3563 return true;
3564 }
3565
3566 // Looking forward we would have to decide if there are function declaration
3567 // like arguments to the requires expression:
3568 // requires (T t) {
3569 // Or there is a constraint expression for the requires clause:
3570 // requires (C<T> && ...
3571
3572 // But first let's look behind.
3573 auto *PreviousNonComment = FormatTok->getPreviousNonComment();
3574
3575 if (!PreviousNonComment ||
3576 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3577 // If there is no token, or an expression left brace, we are a requires
3578 // clause within a requires expression.
3579 parseRequiresClause();
3580 return true;
3581 }
3582
3583 switch (PreviousNonComment->Tok.getKind()) {
3584 case tok::greater:
3585 case tok::r_paren:
3586 case tok::kw_noexcept:
3587 case tok::kw_const:
3588 case tok::star:
3589 case tok::amp:
3590 // This is a requires clause.
3591 parseRequiresClause();
3592 return true;
3593 case tok::ampamp: {
3594 // This can be either:
3595 // if (... && requires (T t) ...)
3596 // Or
3597 // void member(...) && requires (C<T> ...
3598 // We check the one token before that for a const:
3599 // void member(...) const && requires (C<T> ...
3600 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3601 if ((PrevPrev && PrevPrev->is(tok::kw_const)) || !SeenEqual) {
3602 parseRequiresClause();
3603 return true;
3604 }
3605 break;
3606 }
3607 default:
3608 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3609 // This is a requires clause.
3610 parseRequiresClause();
3611 return true;
3612 }
3613 // It's an expression.
3614 parseRequiresExpression();
3615 return false;
3616 }
3617
3618 // Now we look forward and try to check if the paren content is a parameter
3619 // list. The parameters can be cv-qualified and contain references or
3620 // pointers.
3621 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3622 // of stuff: typename, const, *, &, &&, ::, identifiers.
3623
3624 unsigned StoredPosition = Tokens->getPosition();
3625 FormatToken *NextToken = Tokens->getNextToken();
3626 int Lookahead = 0;
3627 auto PeekNext = [&Lookahead, &NextToken, this] {
3628 ++Lookahead;
3629 NextToken = Tokens->getNextToken();
3630 };
3631
3632 bool FoundType = false;
3633 bool LastWasColonColon = false;
3634 int OpenAngles = 0;
3635
3636 for (; Lookahead < 50; PeekNext()) {
3637 switch (NextToken->Tok.getKind()) {
3638 case tok::kw_volatile:
3639 case tok::kw_const:
3640 case tok::comma:
3641 if (OpenAngles == 0) {
3642 FormatTok = Tokens->setPosition(StoredPosition);
3643 parseRequiresExpression();
3644 return false;
3645 }
3646 break;
3647 case tok::eof:
3648 // Break out of the loop.
3649 Lookahead = 50;
3650 break;
3651 case tok::coloncolon:
3652 LastWasColonColon = true;
3653 break;
3654 case tok::kw_decltype:
3655 case tok::identifier:
3656 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3657 FormatTok = Tokens->setPosition(StoredPosition);
3658 parseRequiresExpression();
3659 return false;
3660 }
3661 FoundType = true;
3662 LastWasColonColon = false;
3663 break;
3664 case tok::less:
3665 ++OpenAngles;
3666 break;
3667 case tok::greater:
3668 --OpenAngles;
3669 break;
3670 default:
3671 if (NextToken->isTypeName(LangOpts)) {
3672 FormatTok = Tokens->setPosition(StoredPosition);
3673 parseRequiresExpression();
3674 return false;
3675 }
3676 break;
3677 }
3678 }
3679 // This seems to be a complicated expression, just assume it's a clause.
3680 FormatTok = Tokens->setPosition(StoredPosition);
3681 parseRequiresClause();
3682 return true;
3683}
3684
3685/// Parses a requires clause.
3686/// \sa parseRequiresExpression
3687///
3688/// Returns if it either has finished parsing the clause, or it detects, that
3689/// the clause is incorrect.
3690void UnwrappedLineParser::parseRequiresClause() {
3691 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3692
3693 // If there is no previous token, we are within a requires expression,
3694 // otherwise we will always have the template or function declaration in front
3695 // of it.
3696 bool InRequiresExpression =
3697 !FormatTok->Previous ||
3698 FormatTok->Previous->is(TT_RequiresExpressionLBrace);
3699
3700 FormatTok->setFinalizedType(InRequiresExpression
3701 ? TT_RequiresClauseInARequiresExpression
3702 : TT_RequiresClause);
3703 nextToken();
3704
3705 // NOTE: parseConstraintExpression is only ever called from this function.
3706 // It could be inlined into here.
3707 parseConstraintExpression();
3708
3709 if (!InRequiresExpression && FormatTok->Previous)
3710 FormatTok->Previous->ClosesRequiresClause = true;
3711}
3712
3713/// Parses a requires expression.
3714/// \sa parseRequiresClause
3715///
3716/// Returns if it either has finished parsing the expression, or it detects,
3717/// that the expression is incorrect.
3718void UnwrappedLineParser::parseRequiresExpression() {
3719 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3720
3721 FormatTok->setFinalizedType(TT_RequiresExpression);
3722 nextToken();
3723
3724 if (FormatTok->is(tok::l_paren)) {
3725 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3726 parseParens();
3727 }
3728
3729 if (FormatTok->is(tok::l_brace)) {
3730 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3731 parseChildBlock();
3732 }
3733}
3734
3735/// Parses a constraint expression.
3736///
3737/// This is the body of a requires clause. It returns, when the parsing is
3738/// complete, or the expression is incorrect.
3739void UnwrappedLineParser::parseConstraintExpression() {
3740 // The special handling for lambdas is needed since tryToParseLambda() eats a
3741 // token and if a requires expression is the last part of a requires clause
3742 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3743 // not set on the correct token. Thus we need to be aware if we even expect a
3744 // lambda to be possible.
3745 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3746 bool LambdaNextTimeAllowed = true;
3747
3748 // Within lambda declarations, it is permitted to put a requires clause after
3749 // its template parameter list, which would place the requires clause right
3750 // before the parentheses of the parameters of the lambda declaration. Thus,
3751 // we track if we expect to see grouping parentheses at all.
3752 // Without this check, `requires foo<T> (T t)` in the below example would be
3753 // seen as the whole requires clause, accidentally eating the parameters of
3754 // the lambda.
3755 // [&]<typename T> requires foo<T> (T t) { ... };
3756 bool TopLevelParensAllowed = true;
3757
3758 do {
3759 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3760
3761 switch (FormatTok->Tok.getKind()) {
3762 case tok::kw_requires:
3763 parseRequiresExpression();
3764 break;
3765
3766 case tok::l_paren:
3767 if (!TopLevelParensAllowed)
3768 return;
3769 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3770 TopLevelParensAllowed = false;
3771 break;
3772
3773 case tok::l_square:
3774 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3775 return;
3776 break;
3777
3778 case tok::kw_const:
3779 case tok::semi:
3780 case tok::kw_class:
3781 case tok::kw_struct:
3782 case tok::kw_union:
3783 return;
3784
3785 case tok::l_brace:
3786 // Potential function body.
3787 return;
3788
3789 case tok::ampamp:
3790 case tok::pipepipe:
3791 FormatTok->setFinalizedType(TT_BinaryOperator);
3792 nextToken();
3793 LambdaNextTimeAllowed = true;
3794 TopLevelParensAllowed = true;
3795 break;
3796
3797 case tok::comma:
3798 case tok::comment:
3799 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3800 nextToken();
3801 break;
3802
3803 case tok::kw_sizeof:
3804 case tok::greater:
3805 case tok::greaterequal:
3806 case tok::greatergreater:
3807 case tok::less:
3808 case tok::lessequal:
3809 case tok::lessless:
3810 case tok::equalequal:
3811 case tok::exclaim:
3812 case tok::exclaimequal:
3813 case tok::plus:
3814 case tok::minus:
3815 case tok::star:
3816 case tok::slash:
3817 LambdaNextTimeAllowed = true;
3818 TopLevelParensAllowed = true;
3819 // Just eat them.
3820 nextToken();
3821 break;
3822
3823 case tok::numeric_constant:
3824 case tok::coloncolon:
3825 case tok::kw_true:
3826 case tok::kw_false:
3827 TopLevelParensAllowed = false;
3828 // Just eat them.
3829 nextToken();
3830 break;
3831
3832 case tok::kw_static_cast:
3833 case tok::kw_const_cast:
3834 case tok::kw_reinterpret_cast:
3835 case tok::kw_dynamic_cast:
3836 nextToken();
3837 if (FormatTok->isNot(tok::less))
3838 return;
3839
3840 nextToken();
3841 parseBracedList(/*IsAngleBracket=*/true);
3842 break;
3843
3844 default:
3845 if (!FormatTok->Tok.getIdentifierInfo()) {
3846 // Identifiers are part of the default case, we check for more then
3847 // tok::identifier to handle builtin type traits.
3848 return;
3849 }
3850
3851 // We need to differentiate identifiers for a template deduction guide,
3852 // variables, or function return types (the constraint expression has
3853 // ended before that), and basically all other cases. But it's easier to
3854 // check the other way around.
3855 assert(FormatTok->Previous);
3856 switch (FormatTok->Previous->Tok.getKind()) {
3857 case tok::coloncolon: // Nested identifier.
3858 case tok::ampamp: // Start of a function or variable for the
3859 case tok::pipepipe: // constraint expression. (binary)
3860 case tok::exclaim: // The same as above, but unary.
3861 case tok::kw_requires: // Initial identifier of a requires clause.
3862 case tok::equal: // Initial identifier of a concept declaration.
3863 break;
3864 default:
3865 return;
3866 }
3867
3868 // Read identifier with optional template declaration.
3869 nextToken();
3870 if (FormatTok->is(tok::less)) {
3871 nextToken();
3872 parseBracedList(/*IsAngleBracket=*/true);
3873 }
3874 TopLevelParensAllowed = false;
3875 break;
3876 }
3877 } while (!eof());
3878}
3879
3880bool UnwrappedLineParser::parseEnum() {
3881 const FormatToken &InitialToken = *FormatTok;
3882
3883 // Won't be 'enum' for NS_ENUMs.
3884 if (FormatTok->is(tok::kw_enum))
3885 nextToken();
3886
3887 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3888 // declarations. An "enum" keyword followed by a colon would be a syntax
3889 // error and thus assume it is just an identifier.
3890 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3891 return false;
3892
3893 // In protobuf, "enum" can be used as a field name.
3894 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3895 return false;
3896
3897 if (IsCpp) {
3898 // Eat up enum class ...
3899 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3900 nextToken();
3901 while (FormatTok->is(tok::l_square))
3902 if (!handleCppAttributes())
3903 return false;
3904 }
3905
3906 while (FormatTok->Tok.getIdentifierInfo() ||
3907 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3908 tok::greater, tok::comma, tok::question,
3909 tok::l_square)) {
3910 if (FormatTok->is(tok::colon))
3911 FormatTok->setFinalizedType(TT_EnumUnderlyingTypeColon);
3912 if (Style.isVerilog()) {
3913 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3914 nextToken();
3915 // In Verilog the base type can have dimensions.
3916 while (FormatTok->is(tok::l_square))
3917 parseSquare();
3918 } else {
3919 nextToken();
3920 }
3921 // We can have macros or attributes in between 'enum' and the enum name.
3922 if (FormatTok->is(tok::l_paren))
3923 parseParens();
3924 if (FormatTok->is(tok::identifier)) {
3925 nextToken();
3926 // If there are two identifiers in a row, this is likely an elaborate
3927 // return type. In Java, this can be "implements", etc.
3928 if (IsCpp && FormatTok->is(tok::identifier))
3929 return false;
3930 }
3931 }
3932
3933 // Just a declaration or something is wrong.
3934 if (FormatTok->isNot(tok::l_brace))
3935 return true;
3936 FormatTok->setFinalizedType(TT_EnumLBrace);
3937 FormatTok->setBlockKind(BK_Block);
3938
3939 if (Style.isJava()) {
3940 // Java enums are different.
3941 parseJavaEnumBody();
3942 return true;
3943 }
3944 if (Style.Language == FormatStyle::LK_Proto) {
3945 parseBlock(/*MustBeDeclaration=*/true);
3946 return true;
3947 }
3948
3949 const bool ManageWhitesmithsBraces =
3950 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3951
3952 if (!Style.AllowShortEnumsOnASingleLine &&
3953 ShouldBreakBeforeBrace(Style, InitialToken,
3954 Tokens->peekNextToken()->is(tok::r_brace))) {
3955 addUnwrappedLine();
3956
3957 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3958 // the whole block.
3959 if (ManageWhitesmithsBraces)
3960 ++Line->Level;
3961 }
3962 // Parse enum body.
3963 nextToken();
3964 if (!Style.AllowShortEnumsOnASingleLine) {
3965 addUnwrappedLine();
3966 if (!ManageWhitesmithsBraces)
3967 ++Line->Level;
3968 }
3969 const auto OpeningLineIndex = CurrentLines->empty()
3970 ? UnwrappedLine::kInvalidIndex
3971 : CurrentLines->size() - 1;
3972 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3973 if (!Style.AllowShortEnumsOnASingleLine && !ManageWhitesmithsBraces)
3974 --Line->Level;
3975 if (HasError) {
3976 if (FormatTok->is(tok::semi))
3977 nextToken();
3978 addUnwrappedLine();
3979 }
3980 setPreviousRBraceType(TT_EnumRBrace);
3981 if (ManageWhitesmithsBraces)
3982 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
3983 return true;
3984
3985 // There is no addUnwrappedLine() here so that we fall through to parsing a
3986 // structural element afterwards. Thus, in "enum A {} n, m;",
3987 // "} n, m;" will end up in one unwrapped line.
3988}
3989
3990bool UnwrappedLineParser::parseStructLike() {
3991 // parseRecord falls through and does not yet add an unwrapped line as a
3992 // record declaration or definition can start a structural element.
3993 parseRecord();
3994 // This does not apply to Java, JavaScript and C#.
3995 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3996 if (FormatTok->is(tok::semi))
3997 nextToken();
3998 addUnwrappedLine();
3999 return true;
4000 }
4001 return false;
4002}
4003
4004namespace {
4005// A class used to set and restore the Token position when peeking
4006// ahead in the token source.
4007class ScopedTokenPosition {
4008 unsigned StoredPosition;
4009 FormatTokenSource *Tokens;
4010
4011public:
4012 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
4013 assert(Tokens && "Tokens expected to not be null");
4014 StoredPosition = Tokens->getPosition();
4015 }
4016
4017 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
4018};
4019} // namespace
4020
4021// Look to see if we have [[ by looking ahead, if
4022// its not then rewind to the original position.
4023bool UnwrappedLineParser::tryToParseSimpleAttribute() {
4024 ScopedTokenPosition AutoPosition(Tokens);
4025 FormatToken *Tok = Tokens->getNextToken();
4026 // We already read the first [ check for the second.
4027 if (Tok->isNot(tok::l_square))
4028 return false;
4029 // Double check that the attribute is just something
4030 // fairly simple.
4031 while (Tok->isNot(tok::eof)) {
4032 if (Tok->is(tok::r_square))
4033 break;
4034 Tok = Tokens->getNextToken();
4035 }
4036 if (Tok->is(tok::eof))
4037 return false;
4038 Tok = Tokens->getNextToken();
4039 if (Tok->isNot(tok::r_square))
4040 return false;
4041 Tok = Tokens->getNextToken();
4042 if (Tok->is(tok::semi))
4043 return false;
4044 return true;
4045}
4046
4047void UnwrappedLineParser::parseJavaEnumBody() {
4048 assert(FormatTok->is(tok::l_brace));
4049 const FormatToken *OpeningBrace = FormatTok;
4050
4051 // Determine whether the enum is simple, i.e. does not have a semicolon or
4052 // constants with class bodies. Simple enums can be formatted like braced
4053 // lists, contracted to a single line, etc.
4054 unsigned StoredPosition = Tokens->getPosition();
4055 bool IsSimple = true;
4056 FormatToken *Tok = Tokens->getNextToken();
4057 while (Tok->isNot(tok::eof)) {
4058 if (Tok->is(tok::r_brace))
4059 break;
4060 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
4061 IsSimple = false;
4062 break;
4063 }
4064 // FIXME: This will also mark enums with braces in the arguments to enum
4065 // constants as "not simple". This is probably fine in practice, though.
4066 Tok = Tokens->getNextToken();
4067 }
4068 FormatTok = Tokens->setPosition(StoredPosition);
4069
4070 if (IsSimple) {
4071 nextToken();
4072 parseBracedList();
4073 addUnwrappedLine();
4074 return;
4075 }
4076
4077 // Parse the body of a more complex enum.
4078 // First add a line for everything up to the "{".
4079 nextToken();
4080 addUnwrappedLine();
4081 ++Line->Level;
4082
4083 // Parse the enum constants.
4084 while (!eof()) {
4085 if (FormatTok->is(tok::l_brace)) {
4086 // Parse the constant's class body.
4087 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
4088 /*MunchSemi=*/false);
4089 } else if (FormatTok->is(tok::l_paren)) {
4090 parseParens();
4091 } else if (FormatTok->is(tok::comma)) {
4092 nextToken();
4093 addUnwrappedLine();
4094 } else if (FormatTok->is(tok::semi)) {
4095 nextToken();
4096 addUnwrappedLine();
4097 break;
4098 } else if (FormatTok->is(tok::r_brace)) {
4099 addUnwrappedLine();
4100 break;
4101 } else {
4102 nextToken();
4103 }
4104 }
4105
4106 // Parse the class body after the enum's ";" if any.
4107 parseLevel(OpeningBrace);
4108 nextToken();
4109 --Line->Level;
4110 addUnwrappedLine();
4111}
4112
4113void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4114 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4115 const FormatToken &InitialToken = *FormatTok;
4116 nextToken();
4117
4118 FormatToken *ClassName =
4119 IsJavaRecord && FormatTok->is(tok::identifier) ? FormatTok : nullptr;
4120 bool IsDerived = false;
4121 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4122 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4123 };
4124 // JavaScript/TypeScript supports anonymous classes like:
4125 // a = class extends foo { }
4126 bool JSPastExtendsOrImplements = false;
4127 // The actual identifier can be a nested name specifier, and in macros
4128 // it is often token-pasted.
4129 // An [[attribute]] can be before the identifier.
4130 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4131 tok::kw_alignas, tok::l_square) ||
4132 FormatTok->isAttribute() ||
4133 ((Style.isJava() || Style.isJavaScript()) &&
4134 FormatTok->isOneOf(tok::period, tok::comma))) {
4135 if (Style.isJavaScript() &&
4136 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4137 JSPastExtendsOrImplements = true;
4138 // JavaScript/TypeScript supports inline object types in
4139 // extends/implements positions:
4140 // class Foo implements {bar: number} { }
4141 nextToken();
4142 if (FormatTok->is(tok::l_brace)) {
4143 tryToParseBracedList();
4144 continue;
4145 }
4146 }
4147 if (FormatTok->is(tok::l_square) && handleCppAttributes())
4148 continue;
4149 auto *Previous = FormatTok;
4150 nextToken();
4151 switch (FormatTok->Tok.getKind()) {
4152 case tok::l_paren:
4153 // We can have macros in between 'class' and the class name.
4154 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4155 // e.g. `struct macro(a) S { int i; };`
4156 Previous->Previous == &InitialToken) {
4157 parseParens();
4158 }
4159 break;
4160 case tok::coloncolon:
4161 case tok::hashhash:
4162 break;
4163 default:
4164 if (JSPastExtendsOrImplements || ClassName ||
4165 Previous->isNot(tok::identifier) || Previous->is(TT_AttributeMacro)) {
4166 break;
4167 }
4168 if (const auto Text = Previous->TokenText;
4169 Text.size() == 1 || Text != Text.upper()) {
4170 ClassName = Previous;
4171 }
4172 }
4173 }
4174
4175 auto IsListInitialization = [&] {
4176 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4177 return false;
4178 assert(FormatTok->is(tok::l_brace));
4179 const auto *Prev = FormatTok->getPreviousNonComment();
4180 assert(Prev);
4181 return Prev != ClassName && Prev->is(tok::identifier) &&
4182 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4183 };
4184
4185 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4186 int AngleNestingLevel = 0;
4187 do {
4188 if (FormatTok->is(tok::less))
4189 ++AngleNestingLevel;
4190 else if (FormatTok->is(tok::greater))
4191 --AngleNestingLevel;
4192
4193 if (AngleNestingLevel == 0) {
4194 if (FormatTok->is(tok::colon)) {
4195 IsDerived = true;
4196 } else if (!IsDerived && FormatTok->is(tok::identifier) &&
4197 FormatTok->Previous->is(tok::coloncolon)) {
4198 ClassName = FormatTok;
4199 } else if (FormatTok->is(tok::l_paren) &&
4200 IsNonMacroIdentifier(FormatTok->Previous)) {
4201 break;
4202 }
4203 }
4204 if (FormatTok->is(tok::l_brace)) {
4205 if (AngleNestingLevel == 0 && IsListInitialization())
4206 return;
4207 calculateBraceTypes(/*ExpectClassBody=*/true);
4208 if (!tryToParseBracedList())
4209 break;
4210 }
4211 if (FormatTok->is(tok::l_square)) {
4212 FormatToken *Previous = FormatTok->Previous;
4213 if (!Previous || (Previous->isNot(tok::r_paren) &&
4214 !Previous->isTypeOrIdentifier(LangOpts))) {
4215 // Don't try parsing a lambda if we had a closing parenthesis before,
4216 // it was probably a pointer to an array: int (*)[].
4217 if (!tryToParseLambda())
4218 continue;
4219 } else {
4220 parseSquare();
4221 continue;
4222 }
4223 }
4224 if (FormatTok->is(tok::semi))
4225 return;
4226 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4227 addUnwrappedLine();
4228 nextToken();
4229 parseCSharpGenericTypeConstraint();
4230 break;
4231 }
4232 nextToken();
4233 } while (!eof());
4234 }
4235
4236 auto GetBraceTypes =
4237 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4238 switch (RecordTok.Tok.getKind()) {
4239 case tok::kw_class:
4240 return {TT_ClassLBrace, TT_ClassRBrace};
4241 case tok::kw_struct:
4242 return {TT_StructLBrace, TT_StructRBrace};
4243 case tok::kw_union:
4244 return {TT_UnionLBrace, TT_UnionRBrace};
4245 default:
4246 // Useful for e.g. interface.
4247 return {TT_RecordLBrace, TT_RecordRBrace};
4248 }
4249 };
4250 if (FormatTok->is(tok::l_brace)) {
4251 if (IsListInitialization())
4252 return;
4253 if (ClassName)
4254 ClassName->setFinalizedType(TT_ClassHeadName);
4255 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4256 FormatTok->setFinalizedType(OpenBraceType);
4257 if (ParseAsExpr) {
4258 parseChildBlock();
4259 } else {
4260 if (ShouldBreakBeforeBrace(Style, InitialToken,
4261 Tokens->peekNextToken()->is(tok::r_brace),
4262 IsJavaRecord)) {
4263 addUnwrappedLine();
4264 }
4265
4266 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4267 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4268 }
4269 setPreviousRBraceType(ClosingBraceType);
4270 }
4271 // There is no addUnwrappedLine() here so that we fall through to parsing a
4272 // structural element afterwards. Thus, in "class A {} n, m;",
4273 // "} n, m;" will end up in one unwrapped line.
4274}
4275
4276void UnwrappedLineParser::parseObjCMethod() {
4277 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4278 "'(' or identifier expected.");
4279 do {
4280 if (FormatTok->is(tok::semi)) {
4281 nextToken();
4282 addUnwrappedLine();
4283 return;
4284 } else if (FormatTok->is(tok::l_brace)) {
4285 if (Style.BraceWrapping.AfterFunction)
4286 addUnwrappedLine();
4287 parseBlock();
4288 addUnwrappedLine();
4289 return;
4290 } else {
4291 nextToken();
4292 }
4293 } while (!eof());
4294}
4295
4296void UnwrappedLineParser::parseObjCProtocolList() {
4297 assert(FormatTok->is(tok::less) && "'<' expected.");
4298 do {
4299 nextToken();
4300 // Early exit in case someone forgot a close angle.
4301 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::objc_end))
4302 return;
4303 } while (!eof() && FormatTok->isNot(tok::greater));
4304 nextToken(); // Skip '>'.
4305}
4306
4307void UnwrappedLineParser::parseObjCUntilAtEnd() {
4308 do {
4309 if (FormatTok->is(tok::objc_end)) {
4310 nextToken();
4311 addUnwrappedLine();
4312 break;
4313 }
4314 if (FormatTok->is(tok::l_brace)) {
4315 parseBlock();
4316 // In ObjC interfaces, nothing should be following the "}".
4317 addUnwrappedLine();
4318 } else if (FormatTok->is(tok::r_brace)) {
4319 // Ignore stray "}". parseStructuralElement doesn't consume them.
4320 nextToken();
4321 addUnwrappedLine();
4322 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4323 nextToken();
4324 if (FormatTok->isOneOf(tok::l_paren, tok::identifier))
4325 parseObjCMethod();
4326 } else {
4327 parseStructuralElement();
4328 }
4329 } while (!eof());
4330}
4331
4332void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4333 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4334 nextToken();
4335 nextToken(); // interface name
4336
4337 // @interface can be followed by a lightweight generic
4338 // specialization list, then either a base class or a category.
4339 if (FormatTok->is(tok::less))
4340 parseObjCLightweightGenerics();
4341 if (FormatTok->is(tok::colon)) {
4342 nextToken();
4343 nextToken(); // base class name
4344 // The base class can also have lightweight generics applied to it.
4345 if (FormatTok->is(tok::less))
4346 parseObjCLightweightGenerics();
4347 } else if (FormatTok->is(tok::l_paren)) {
4348 // Skip category, if present.
4349 parseParens();
4350 }
4351
4352 if (FormatTok->is(tok::less))
4353 parseObjCProtocolList();
4354
4355 if (FormatTok->is(tok::l_brace)) {
4356 if (Style.BraceWrapping.AfterObjCDeclaration)
4357 addUnwrappedLine();
4358 parseBlock(/*MustBeDeclaration=*/true);
4359 }
4360
4361 // With instance variables, this puts '}' on its own line. Without instance
4362 // variables, this ends the @interface line.
4363 addUnwrappedLine();
4364
4365 parseObjCUntilAtEnd();
4366}
4367
4368void UnwrappedLineParser::parseObjCLightweightGenerics() {
4369 assert(FormatTok->is(tok::less));
4370 // Unlike protocol lists, generic parameterizations support
4371 // nested angles:
4372 //
4373 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4374 // NSObject <NSCopying, NSSecureCoding>
4375 //
4376 // so we need to count how many open angles we have left.
4377 unsigned NumOpenAngles = 1;
4378 do {
4379 nextToken();
4380 // Early exit in case someone forgot a close angle.
4381 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::objc_end))
4382 break;
4383 if (FormatTok->is(tok::less)) {
4384 ++NumOpenAngles;
4385 } else if (FormatTok->is(tok::greater)) {
4386 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4387 --NumOpenAngles;
4388 }
4389 } while (!eof() && NumOpenAngles != 0);
4390 nextToken(); // Skip '>'.
4391}
4392
4393// Returns true for the declaration/definition form of @protocol,
4394// false for the expression form.
4395bool UnwrappedLineParser::parseObjCProtocol() {
4396 assert(FormatTok->is(tok::objc_protocol));
4397 nextToken();
4398
4399 if (FormatTok->is(tok::l_paren)) {
4400 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4401 return false;
4402 }
4403
4404 // The definition/declaration form,
4405 // @protocol Foo
4406 // - (int)someMethod;
4407 // @end
4408
4409 nextToken(); // protocol name
4410
4411 if (FormatTok->is(tok::less))
4412 parseObjCProtocolList();
4413
4414 // Check for protocol declaration.
4415 if (FormatTok->is(tok::semi)) {
4416 nextToken();
4417 addUnwrappedLine();
4418 return true;
4419 }
4420
4421 addUnwrappedLine();
4422 parseObjCUntilAtEnd();
4423 return true;
4424}
4425
4426void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4427 bool IsImport = FormatTok->is(Keywords.kw_import);
4428 assert(IsImport || FormatTok->is(tok::kw_export));
4429 nextToken();
4430
4431 // Consume the "default" in "export default class/function".
4432 if (FormatTok->is(tok::kw_default))
4433 nextToken();
4434
4435 // Consume "async function", "function" and "default function", so that these
4436 // get parsed as free-standing JS functions, i.e. do not require a trailing
4437 // semicolon.
4438 if (FormatTok->is(Keywords.kw_async))
4439 nextToken();
4440 if (FormatTok->is(Keywords.kw_function)) {
4441 nextToken();
4442 return;
4443 }
4444
4445 // For imports, `export *`, `export {...}`, consume the rest of the line up
4446 // to the terminating `;`. For everything else, just return and continue
4447 // parsing the structural element, i.e. the declaration or expression for
4448 // `export default`.
4449 if (!IsImport && FormatTok->isNoneOf(tok::l_brace, tok::star) &&
4450 !FormatTok->isStringLiteral() &&
4451 !(FormatTok->is(Keywords.kw_type) &&
4452 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4453 return;
4454 }
4455
4456 while (!eof()) {
4457 if (FormatTok->is(tok::semi))
4458 return;
4459 if (Line->Tokens.empty()) {
4460 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4461 // import statement should terminate.
4462 return;
4463 }
4464 if (FormatTok->is(tok::l_brace)) {
4465 FormatTok->setBlockKind(BK_Block);
4466 nextToken();
4467 parseBracedList();
4468 } else {
4469 nextToken();
4470 }
4471 }
4472}
4473
4474void UnwrappedLineParser::parseStatementMacro() {
4475 nextToken();
4476 if (FormatTok->is(tok::l_paren))
4477 parseParens();
4478 if (FormatTok->is(tok::semi))
4479 nextToken();
4480 addUnwrappedLine();
4481}
4482
4483void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4484 // consume things like a::`b.c[d:e] or a::*
4485 while (true) {
4486 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4487 tok::coloncolon, tok::hash) ||
4488 Keywords.isVerilogIdentifier(*FormatTok)) {
4489 nextToken();
4490 } else if (FormatTok->is(tok::l_square)) {
4491 parseSquare();
4492 } else {
4493 break;
4494 }
4495 }
4496}
4497
4498void UnwrappedLineParser::parseVerilogSensitivityList() {
4499 if (FormatTok->isNot(tok::at))
4500 return;
4501 nextToken();
4502 // A block event expression has 2 at signs.
4503 if (FormatTok->is(tok::at))
4504 nextToken();
4505 switch (FormatTok->Tok.getKind()) {
4506 case tok::star:
4507 nextToken();
4508 break;
4509 case tok::l_paren:
4510 parseParens();
4511 break;
4512 default:
4513 parseVerilogHierarchyIdentifier();
4514 break;
4515 }
4516}
4517
4518unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4519 unsigned AddLevels = 0;
4520
4521 if (FormatTok->is(Keywords.kw_clocking)) {
4522 nextToken();
4523 if (Keywords.isVerilogIdentifier(*FormatTok))
4524 nextToken();
4525 parseVerilogSensitivityList();
4526 if (FormatTok->is(tok::semi))
4527 nextToken();
4528 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4529 Keywords.kw_casez, Keywords.kw_randcase,
4530 Keywords.kw_randsequence)) {
4531 if (Style.IndentCaseLabels)
4532 AddLevels++;
4533 nextToken();
4534 if (FormatTok->is(tok::l_paren)) {
4535 FormatTok->setFinalizedType(TT_ConditionLParen);
4536 parseParens();
4537 }
4538 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4539 nextToken();
4540 // The case header has no semicolon.
4541 } else {
4542 // "module" etc.
4543 nextToken();
4544 // all the words like the name of the module and specifiers like
4545 // "automatic" and the width of function return type
4546 while (true) {
4547 if (FormatTok->is(tok::l_square)) {
4548 auto Prev = FormatTok->getPreviousNonComment();
4549 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4550 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4551 parseSquare();
4552 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4553 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
4554 Keywords.kw_automatic, tok::kw_static)) {
4555 nextToken();
4556 } else {
4557 break;
4558 }
4559 }
4560
4561 auto NewLine = [this]() {
4562 addUnwrappedLine();
4563 Line->IsContinuation = true;
4564 };
4565
4566 // package imports
4567 while (FormatTok->is(Keywords.kw_import)) {
4568 NewLine();
4569 nextToken();
4570 parseVerilogHierarchyIdentifier();
4571 if (FormatTok->is(tok::semi))
4572 nextToken();
4573 }
4574
4575 // parameters and ports
4576 if (FormatTok->is(Keywords.kw_verilogHash)) {
4577 NewLine();
4578 nextToken();
4579 if (FormatTok->is(tok::l_paren)) {
4580 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4581 parseParens();
4582 }
4583 }
4584 if (FormatTok->is(tok::l_paren)) {
4585 NewLine();
4586 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4587 parseParens();
4588 }
4589
4590 // extends and implements
4591 if (FormatTok->is(Keywords.kw_extends)) {
4592 NewLine();
4593 nextToken();
4594 parseVerilogHierarchyIdentifier();
4595 if (FormatTok->is(tok::l_paren))
4596 parseParens();
4597 }
4598 if (FormatTok->is(Keywords.kw_implements)) {
4599 NewLine();
4600 do {
4601 nextToken();
4602 parseVerilogHierarchyIdentifier();
4603 } while (FormatTok->is(tok::comma));
4604 }
4605
4606 // Coverage event for cover groups.
4607 if (FormatTok->is(tok::at)) {
4608 NewLine();
4609 parseVerilogSensitivityList();
4610 }
4611
4612 if (FormatTok->is(tok::semi))
4613 nextToken(/*LevelDifference=*/1);
4614 addUnwrappedLine();
4615 }
4616
4617 return AddLevels;
4618}
4619
4620void UnwrappedLineParser::parseVerilogTable() {
4621 assert(FormatTok->is(Keywords.kw_table));
4622 nextToken(/*LevelDifference=*/1);
4623 addUnwrappedLine();
4624
4625 auto InitialLevel = Line->Level++;
4626 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4627 FormatToken *Tok = FormatTok;
4628 nextToken();
4629 if (Tok->is(tok::semi))
4630 addUnwrappedLine();
4631 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4632 Tok->setFinalizedType(TT_VerilogTableItem);
4633 }
4634 Line->Level = InitialLevel;
4635 nextToken(/*LevelDifference=*/-1);
4636 addUnwrappedLine();
4637}
4638
4639void UnwrappedLineParser::parseVerilogCaseLabel() {
4640 // The label will get unindented in AnnotatingParser. If there are no leading
4641 // spaces, indent the rest here so that things inside the block will be
4642 // indented relative to things outside. We don't use parseLabel because we
4643 // don't know whether this colon is a label or a ternary expression at this
4644 // point.
4645 auto OrigLevel = Line->Level;
4646 auto FirstLine = CurrentLines->size();
4647 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4648 ++Line->Level;
4649 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4650 --Line->Level;
4651 parseStructuralElement();
4652 // Restore the indentation in both the new line and the line that has the
4653 // label.
4654 if (CurrentLines->size() > FirstLine)
4655 (*CurrentLines)[FirstLine].Level = OrigLevel;
4656 Line->Level = OrigLevel;
4657}
4658
4659void UnwrappedLineParser::parseVerilogExtern() {
4660 assert(
4661 FormatTok->isOneOf(tok::kw_extern, tok::kw_export, Keywords.kw_import));
4662 nextToken();
4663 // "DPI-C"
4664 if (FormatTok->is(tok::string_literal))
4665 nextToken();
4666 skipVerilogQualifiers();
4667 if (Keywords.isVerilogIdentifier(*FormatTok))
4668 nextToken();
4669 if (FormatTok->is(tok::equal))
4670 nextToken();
4671 if (Keywords.isVerilogHierarchy(*FormatTok))
4672 parseVerilogHierarchyHeader();
4673}
4674
4675void UnwrappedLineParser::skipVerilogQualifiers() {
4676 while (FormatTok->isOneOf(tok::kw_protected, tok::kw_virtual, tok::kw_static,
4677 Keywords.kw_rand, Keywords.kw_context,
4678 Keywords.kw_pure, Keywords.kw_randc,
4679 Keywords.kw_local)) {
4680 nextToken();
4681 }
4682}
4683
4684bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4685 for (const auto &N : Line.Tokens) {
4686 if (N.Tok->MacroCtx)
4687 return true;
4688 for (const UnwrappedLine &Child : N.Children)
4689 if (containsExpansion(Child))
4690 return true;
4691 }
4692 return false;
4693}
4694
4695void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4696 if (Line->Tokens.empty())
4697 return;
4698 LLVM_DEBUG({
4699 if (!parsingPPDirective()) {
4700 llvm::dbgs() << "Adding unwrapped line:\n";
4701 printDebugInfo(*Line);
4702 }
4703 });
4704
4705 // If this line closes a block when in Whitesmiths mode, remember that
4706 // information so that the level can be decreased after the line is added.
4707 // This has to happen after the addition of the line since the line itself
4708 // needs to be indented.
4709 bool ClosesWhitesmithsBlock =
4710 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4711 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4712
4713 // If the current line was expanded from a macro call, we use it to
4714 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4715 // line and the unexpanded token stream.
4716 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4717 if (!Reconstruct)
4718 Reconstruct.emplace(Line->Level, Unexpanded);
4719 Reconstruct->addLine(*Line);
4720
4721 // While the reconstructed unexpanded lines are stored in the normal
4722 // flow of lines, the expanded lines are stored on the side to be analyzed
4723 // in an extra step.
4724 CurrentExpandedLines.push_back(std::move(*Line));
4725
4726 if (Reconstruct->finished()) {
4727 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4728 assert(!Reconstructed.Tokens.empty() &&
4729 "Reconstructed must at least contain the macro identifier.");
4730 assert(!parsingPPDirective());
4731 LLVM_DEBUG({
4732 llvm::dbgs() << "Adding unexpanded line:\n";
4733 printDebugInfo(Reconstructed);
4734 });
4735 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4736 Lines.push_back(std::move(Reconstructed));
4737 CurrentExpandedLines.clear();
4738 Reconstruct.reset();
4739 }
4740 } else {
4741 // At the top level we only get here when no unexpansion is going on, or
4742 // when conditional formatting led to unfinished macro reconstructions.
4743 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4744 CurrentLines->push_back(std::move(*Line));
4745 }
4746 Line->Tokens.clear();
4747 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4748 Line->FirstStartColumn = 0;
4749 Line->IsContinuation = false;
4750 Line->SeenDecltypeAuto = false;
4751 Line->IsModuleOrImportDecl = false;
4752
4753 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4754 --Line->Level;
4755 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4756 CurrentLines->append(
4757 std::make_move_iterator(PreprocessorDirectives.begin()),
4758 std::make_move_iterator(PreprocessorDirectives.end()));
4759 PreprocessorDirectives.clear();
4760 }
4761 // Disconnect the current token from the last token on the previous line.
4762 FormatTok->Previous = nullptr;
4763}
4764
4765bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4766
4767bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4768 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4769 FormatTok.NewlinesBefore > 0;
4770}
4771
4772// Checks if \p FormatTok is a line comment that continues the line comment
4773// section on \p Line.
4774static bool
4776 const UnwrappedLine &Line, const FormatStyle &Style,
4777 const llvm::Regex &CommentPragmasRegex) {
4778 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4779 return false;
4780
4781 StringRef IndentContent = FormatTok.TokenText;
4782 if (FormatTok.TokenText.starts_with("//") ||
4783 FormatTok.TokenText.starts_with("/*")) {
4784 IndentContent = FormatTok.TokenText.substr(2);
4785 }
4786 if (CommentPragmasRegex.match(IndentContent))
4787 return false;
4788
4789 // If Line starts with a line comment, then FormatTok continues the comment
4790 // section if its original column is greater or equal to the original start
4791 // column of the line.
4792 //
4793 // Define the min column token of a line as follows: if a line ends in '{' or
4794 // contains a '{' followed by a line comment, then the min column token is
4795 // that '{'. Otherwise, the min column token of the line is the first token of
4796 // the line.
4797 //
4798 // If Line starts with a token other than a line comment, then FormatTok
4799 // continues the comment section if its original column is greater than the
4800 // original start column of the min column token of the line.
4801 //
4802 // For example, the second line comment continues the first in these cases:
4803 //
4804 // // first line
4805 // // second line
4806 //
4807 // and:
4808 //
4809 // // first line
4810 // // second line
4811 //
4812 // and:
4813 //
4814 // int i; // first line
4815 // // second line
4816 //
4817 // and:
4818 //
4819 // do { // first line
4820 // // second line
4821 // int i;
4822 // } while (true);
4823 //
4824 // and:
4825 //
4826 // enum {
4827 // a, // first line
4828 // // second line
4829 // b
4830 // };
4831 //
4832 // The second line comment doesn't continue the first in these cases:
4833 //
4834 // // first line
4835 // // second line
4836 //
4837 // and:
4838 //
4839 // int i; // first line
4840 // // second line
4841 //
4842 // and:
4843 //
4844 // do { // first line
4845 // // second line
4846 // int i;
4847 // } while (true);
4848 //
4849 // and:
4850 //
4851 // enum {
4852 // a, // first line
4853 // // second line
4854 // };
4855 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4856
4857 // Scan for '{//'. If found, use the column of '{' as a min column for line
4858 // comment section continuation.
4859 const FormatToken *PreviousToken = nullptr;
4860 for (const UnwrappedLineNode &Node : Line.Tokens) {
4861 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4862 isLineComment(*Node.Tok)) {
4863 MinColumnToken = PreviousToken;
4864 break;
4865 }
4866 PreviousToken = Node.Tok;
4867
4868 // Grab the last newline preceding a token in this unwrapped line.
4869 if (Node.Tok->NewlinesBefore > 0)
4870 MinColumnToken = Node.Tok;
4871 }
4872 if (PreviousToken && PreviousToken->is(tok::l_brace))
4873 MinColumnToken = PreviousToken;
4874
4875 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4876 MinColumnToken);
4877}
4878
4879void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4880 bool JustComments = Line->Tokens.empty();
4881 for (FormatToken *Tok : CommentsBeforeNextToken) {
4882 // Line comments that belong to the same line comment section are put on the
4883 // same line since later we might want to reflow content between them.
4884 // Additional fine-grained breaking of line comment sections is controlled
4885 // by the class BreakableLineCommentSection in case it is desirable to keep
4886 // several line comment sections in the same unwrapped line.
4887 //
4888 // FIXME: Consider putting separate line comment sections as children to the
4889 // unwrapped line instead.
4890 Tok->ContinuesLineCommentSection =
4891 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex);
4892 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4893 addUnwrappedLine();
4894 pushToken(Tok);
4895 }
4896 if (NewlineBeforeNext && JustComments)
4897 addUnwrappedLine();
4898 CommentsBeforeNextToken.clear();
4899}
4900
4901void UnwrappedLineParser::nextToken(int LevelDifference) {
4902 if (eof())
4903 return;
4904 flushComments(isOnNewLine(*FormatTok));
4905 pushToken(FormatTok);
4906 FormatToken *Previous = FormatTok;
4907 if (!Style.isJavaScript())
4908 readToken(LevelDifference);
4909 else
4910 readTokenWithJavaScriptASI();
4911 FormatTok->Previous = Previous;
4912 if (Style.isVerilog()) {
4913 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4914 // keywords like `begin`, we can't treat them the same as left braces
4915 // because some contexts require one of them. For example structs use
4916 // braces and if blocks use keywords, and a left brace can occur in an if
4917 // statement, but it is not a block. For keywords like `end`, we simply
4918 // treat them the same as right braces.
4919 if (Keywords.isVerilogEnd(*FormatTok))
4920 FormatTok->Tok.setKind(tok::r_brace);
4921 }
4922}
4923
4924void UnwrappedLineParser::distributeComments(
4925 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4926 // Whether or not a line comment token continues a line is controlled by
4927 // the method continuesLineCommentSection, with the following caveat:
4928 //
4929 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4930 // that each comment line from the trail is aligned with the next token, if
4931 // the next token exists. If a trail exists, the beginning of the maximal
4932 // trail is marked as a start of a new comment section.
4933 //
4934 // For example in this code:
4935 //
4936 // int a; // line about a
4937 // // line 1 about b
4938 // // line 2 about b
4939 // int b;
4940 //
4941 // the two lines about b form a maximal trail, so there are two sections, the
4942 // first one consisting of the single comment "// line about a" and the
4943 // second one consisting of the next two comments.
4944 if (Comments.empty())
4945 return;
4946 bool ShouldPushCommentsInCurrentLine = true;
4947 bool HasTrailAlignedWithNextToken = false;
4948 unsigned StartOfTrailAlignedWithNextToken = 0;
4949 if (NextTok) {
4950 // We are skipping the first element intentionally.
4951 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4952 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4953 HasTrailAlignedWithNextToken = true;
4954 StartOfTrailAlignedWithNextToken = i;
4955 }
4956 }
4957 }
4958 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4959 FormatToken *FormatTok = Comments[i];
4960 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4961 FormatTok->ContinuesLineCommentSection = false;
4962 } else {
4963 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4964 *FormatTok, *Line, Style, CommentPragmasRegex);
4965 }
4966 if (!FormatTok->ContinuesLineCommentSection &&
4967 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4968 ShouldPushCommentsInCurrentLine = false;
4969 }
4970 if (ShouldPushCommentsInCurrentLine)
4971 pushToken(FormatTok);
4972 else
4973 CommentsBeforeNextToken.push_back(FormatTok);
4974 }
4975}
4976
4977void UnwrappedLineParser::readToken(int LevelDifference) {
4979 bool PreviousWasComment = false;
4980 bool FirstNonCommentOnLine = false;
4981 do {
4982 FormatTok = Tokens->getNextToken();
4983 assert(FormatTok);
4984 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4985 TT_ConflictAlternative)) {
4986 if (FormatTok->is(TT_ConflictStart))
4987 conditionalCompilationStart(/*Unreachable=*/false);
4988 else if (FormatTok->is(TT_ConflictAlternative))
4989 conditionalCompilationAlternative();
4990 else if (FormatTok->is(TT_ConflictEnd))
4991 conditionalCompilationEnd();
4992 FormatTok = Tokens->getNextToken();
4993 FormatTok->MustBreakBefore = true;
4994 FormatTok->MustBreakBeforeFinalized = true;
4995 }
4996
4997 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4998 const FormatToken &Tok,
4999 bool PreviousWasComment) {
5000 auto IsFirstOnLine = [](const FormatToken &Tok) {
5001 return Tok.HasUnescapedNewline || Tok.IsFirst;
5002 };
5003
5004 // Consider preprocessor directives preceded by block comments as first
5005 // on line.
5006 if (PreviousWasComment)
5007 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
5008 return IsFirstOnLine(Tok);
5009 };
5010
5011 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
5012 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
5013 PreviousWasComment = FormatTok->is(tok::comment);
5014
5015 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
5016 FirstNonCommentOnLine) {
5017 // In Verilog, the backtick is used for macro invocations. In TableGen,
5018 // the single hash is used for the paste operator.
5019 const auto *Next = Tokens->peekNextToken();
5020 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
5021 (Style.isTableGen() &&
5022 Next->isNoneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
5023 tok::pp_ifndef, tok::pp_endif))) {
5024 break;
5025 }
5026 distributeComments(Comments, FormatTok);
5027 Comments.clear();
5028 // If there is an unfinished unwrapped line, we flush the preprocessor
5029 // directives only after that unwrapped line was finished later.
5030 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
5031 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
5032 assert((LevelDifference >= 0 ||
5033 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
5034 "LevelDifference makes Line->Level negative");
5035 Line->Level += LevelDifference;
5036 // Comments stored before the preprocessor directive need to be output
5037 // before the preprocessor directive, at the same level as the
5038 // preprocessor directive, as we consider them to apply to the directive.
5039 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
5040 PPBranchLevel > 0) {
5041 Line->Level += PPBranchLevel;
5042 }
5043 assert(Line->Level >= Line->UnbracedBodyLevel);
5044 Line->Level -= Line->UnbracedBodyLevel;
5045 flushComments(isOnNewLine(*FormatTok));
5046 const bool IsEndIf = Tokens->peekNextToken()->is(tok::pp_endif);
5047 parsePPDirective();
5048 PreviousWasComment = FormatTok->is(tok::comment);
5049 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
5050 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
5051 // If the #endif of a potential include guard is the last thing in the
5052 // file, then we found an include guard.
5053 if (IsEndIf && IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
5054 getIncludeGuardState(Style.IndentPPDirectives) == IG_Inited &&
5055 (eof() ||
5056 (PreviousWasComment &&
5057 Tokens->peekNextToken(/*SkipComment=*/true)->is(tok::eof)))) {
5058 IncludeGuard = IG_Found;
5059 }
5060 }
5061
5062 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
5063 !Line->InPPDirective) {
5064 continue;
5065 }
5066
5067 if (FormatTok->is(tok::identifier) &&
5068 Macros.defined(FormatTok->TokenText) &&
5069 // FIXME: Allow expanding macros in preprocessor directives.
5070 !Line->InPPDirective) {
5071 FormatToken *ID = FormatTok;
5072 unsigned Position = Tokens->getPosition();
5073
5074 // To correctly parse the code, we need to replace the tokens of the macro
5075 // call with its expansion.
5076 auto PreCall = std::move(Line);
5077 Line.reset(new UnwrappedLine);
5078 bool OldInExpansion = InExpansion;
5079 InExpansion = true;
5080 // We parse the macro call into a new line.
5081 auto Args = parseMacroCall();
5082 InExpansion = OldInExpansion;
5083 assert(Line->Tokens.front().Tok == ID);
5084 // And remember the unexpanded macro call tokens.
5085 auto UnexpandedLine = std::move(Line);
5086 // Reset to the old line.
5087 Line = std::move(PreCall);
5088
5089 LLVM_DEBUG({
5090 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
5091 if (Args) {
5092 llvm::dbgs() << "(";
5093 for (const auto &Arg : Args.value())
5094 for (const auto &T : Arg)
5095 llvm::dbgs() << T->TokenText << " ";
5096 llvm::dbgs() << ")";
5097 }
5098 llvm::dbgs() << "\n";
5099 });
5100 if (Macros.objectLike(ID->TokenText) && Args &&
5101 !Macros.hasArity(ID->TokenText, Args->size())) {
5102 // The macro is either
5103 // - object-like, but we got argumnets, or
5104 // - overloaded to be both object-like and function-like, but none of
5105 // the function-like arities match the number of arguments.
5106 // Thus, expand as object-like macro.
5107 LLVM_DEBUG(llvm::dbgs()
5108 << "Macro \"" << ID->TokenText
5109 << "\" not overloaded for arity " << Args->size()
5110 << "or not function-like, using object-like overload.");
5111 Args.reset();
5112 UnexpandedLine->Tokens.resize(1);
5113 Tokens->setPosition(Position);
5114 nextToken();
5115 assert(!Args && Macros.objectLike(ID->TokenText));
5116 }
5117 if ((!Args && Macros.objectLike(ID->TokenText)) ||
5118 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
5119 // Next, we insert the expanded tokens in the token stream at the
5120 // current position, and continue parsing.
5121 Unexpanded[ID] = std::move(UnexpandedLine);
5123 Macros.expand(ID, std::move(Args));
5124 if (!Expansion.empty())
5125 FormatTok = Tokens->insertTokens(Expansion);
5126
5127 LLVM_DEBUG({
5128 llvm::dbgs() << "Expanded: ";
5129 for (const auto &T : Expansion)
5130 llvm::dbgs() << T->TokenText << " ";
5131 llvm::dbgs() << "\n";
5132 });
5133 } else {
5134 LLVM_DEBUG({
5135 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5136 << "\", because it was used ";
5137 if (Args)
5138 llvm::dbgs() << "with " << Args->size();
5139 else
5140 llvm::dbgs() << "without";
5141 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5142 });
5143 Tokens->setPosition(Position);
5144 FormatTok = ID;
5145 }
5146 }
5147
5148 if (FormatTok->isNot(tok::comment)) {
5149 distributeComments(Comments, FormatTok);
5150 Comments.clear();
5151 return;
5152 }
5153
5154 Comments.push_back(FormatTok);
5155 } while (!eof());
5156
5157 distributeComments(Comments, nullptr);
5158 Comments.clear();
5159}
5160
5161namespace {
5162template <typename Iterator>
5163void pushTokens(Iterator Begin, Iterator End,
5165 for (auto I = Begin; I != End; ++I) {
5166 Into.push_back(I->Tok);
5167 for (const auto &Child : I->Children)
5168 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5169 }
5170}
5171} // namespace
5172
5173std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5174UnwrappedLineParser::parseMacroCall() {
5175 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5176 assert(Line->Tokens.empty());
5177 nextToken();
5178 if (FormatTok->isNot(tok::l_paren))
5179 return Args;
5180 unsigned Position = Tokens->getPosition();
5181 FormatToken *Tok = FormatTok;
5182 nextToken();
5183 Args.emplace();
5184 auto ArgStart = std::prev(Line->Tokens.end());
5185
5186 int Parens = 0;
5187 do {
5188 switch (FormatTok->Tok.getKind()) {
5189 case tok::l_paren:
5190 ++Parens;
5191 nextToken();
5192 break;
5193 case tok::r_paren: {
5194 if (Parens > 0) {
5195 --Parens;
5196 nextToken();
5197 break;
5198 }
5199 Args->push_back({});
5200 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5201 nextToken();
5202 return Args;
5203 }
5204 case tok::comma: {
5205 if (Parens > 0) {
5206 nextToken();
5207 break;
5208 }
5209 Args->push_back({});
5210 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5211 nextToken();
5212 ArgStart = std::prev(Line->Tokens.end());
5213 break;
5214 }
5215 default:
5216 nextToken();
5217 break;
5218 }
5219 } while (!eof());
5220 Line->Tokens.resize(1);
5221 Tokens->setPosition(Position);
5222 FormatTok = Tok;
5223 return {};
5224}
5225
5226void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5227 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5228 if (AtEndOfPPLine) {
5229 auto &Tok = *Line->Tokens.back().Tok;
5230 Tok.MustBreakBefore = true;
5231 Tok.MustBreakBeforeFinalized = true;
5232 Tok.FirstAfterPPLine = true;
5233 AtEndOfPPLine = false;
5234 }
5235}
5236
5237} // end namespace format
5238} // end namespace clang
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
FormatToken()
Token Tok
The Token.
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
FormatToken * Previous
The previous token in the unwrapped line.
FormatToken * Next
The next token in the unwrapped line.
This file contains the main building blocks of macro support in clang-format.
static bool HasAttribute(const QualType &T)
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition Parser.h:256
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition Token.h:126
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:104
tok::TokenKind getKind() const
Definition Token.h:99
bool isOneOf(Ts... Ks) const
Definition Token.h:105
bool isNot(tok::TokenKind K) const
Definition Token.h:111
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const FormatStyle &Style, const llvm::Regex &CommentPragmasRegex)
static bool isC78Type(const FormatToken &Tok)
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken, bool IsEmptyBlock, bool IsJavaRecord=false)
LangOptions getFormattingLangOpts(const FormatStyle &Style)
Definition Format.cpp:4458
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition TokenKinds.h:101
The JSON file list parser is used to communicate input to InstallAPI.
bool isLineComment(const FormatToken &FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
std::vector< std::string > Macros
A list of macros of the form <definition>=<expansion> .
Definition Format.h:3951
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
bool isCpp() const
Definition Format.h:3843
@ Default
Set to the current date and time.
@ Type
The name was classified as a type.
Definition Sema.h:564
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
@ Parens
New-expression has a C++98 paren-delimited initializer.
Definition ExprCXX.h:2249
#define false
Definition stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
IdentifierInfo * kw_instanceof
IdentifierInfo * kw_implements
IdentifierInfo * kw_override
IdentifierInfo * kw_await
IdentifierInfo * kw_extends
IdentifierInfo * kw_async
IdentifierInfo * kw_from
IdentifierInfo * kw_abstract
IdentifierInfo * kw_var
IdentifierInfo * kw_interface
IdentifierInfo * kw_function
IdentifierInfo * kw_yield
IdentifierInfo * kw_where
IdentifierInfo * kw_throws
IdentifierInfo * kw_let
IdentifierInfo * kw_import
IdentifierInfo * kw_finally
Represents a complete lambda introducer.
Definition DeclSpec.h:2884
A wrapper around a Token storing information about the whitespace characters preceding it.
bool Optional
Is optional and can be removed.
bool isNot(T Kind) const
StringRef TokenText
The raw text of the token.
bool isNoneOf(Ts... Ks) const
unsigned NewlinesBefore
The number of newlines immediately before the Token.
bool is(tok::TokenKind Kind) const
bool isOneOf(A K1, B K2) const
unsigned IsFirst
Indicates that this is the first token of the file.
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
FormatToken * Previous
The previous token in the unwrapped line.
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...