clang 17.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
61 }
62 }
63 if (!NewLine)
64 OS << "\n";
65}
66
67LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
69}
70
71class ScopedDeclarationState {
72public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
78 }
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
85 }
86
87private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
90};
91
92} // end anonymous namespace
93
95public:
97 bool SwitchToPreprocessorLines = false)
98 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99 if (SwitchToPreprocessorLines)
100 Parser.CurrentLines = &Parser.PreprocessorDirectives;
101 else if (!Parser.Line->Tokens.empty())
102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103 PreBlockLine = std::move(Parser.Line);
104 Parser.Line = std::make_unique<UnwrappedLine>();
105 Parser.Line->Level = PreBlockLine->Level;
106 Parser.Line->PPLevel = PreBlockLine->PPLevel;
107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
109 }
110
112 if (!Parser.Line->Tokens.empty())
113 Parser.addUnwrappedLine();
114 assert(Parser.Line->Tokens.empty());
115 Parser.Line = std::move(PreBlockLine);
116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117 Parser.MustBreakBeforeNextToken = true;
118 Parser.CurrentLines = OriginalLines;
119 }
120
121private:
123
124 std::unique_ptr<UnwrappedLine> PreBlockLine;
125 SmallVectorImpl<UnwrappedLine> *OriginalLines;
126};
127
129public:
131 const FormatStyle &Style, unsigned &LineLevel)
133 Style.BraceWrapping.AfterControlStatement,
134 Style.BraceWrapping.IndentBraces) {}
136 bool WrapBrace, bool IndentBrace)
137 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138 if (WrapBrace)
139 Parser->addUnwrappedLine();
140 if (IndentBrace)
141 ++LineLevel;
142 }
143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
144
145private:
146 unsigned &LineLevel;
147 unsigned OldLineLevel;
148};
149
151 SourceManager &SourceMgr, const FormatStyle &Style,
152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155 IdentifierTable &IdentTable)
156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161 ? IG_Rejected
162 : IG_Inited),
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
165
166void UnwrappedLineParser::reset() {
167 PPBranchLevel = -1;
168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited;
171 IncludeGuardToken = nullptr;
172 Line.reset(new UnwrappedLine);
173 CommentsBeforeNextToken.clear();
174 FormatTok = nullptr;
175 MustBreakBeforeNextToken = false;
176 PreprocessorDirectives.clear();
177 CurrentLines = &Lines;
178 DeclarationScopeStack.clear();
179 NestedTooDeep.clear();
180 PPStack.clear();
181 Line->FirstStartColumn = FirstStartColumn;
182
183 if (!Unexpanded.empty())
184 for (FormatToken *Token : AllTokens)
185 Token->MacroCtx.reset();
186 CurrentExpandedLines.clear();
187 ExpandedLines.clear();
188 Unexpanded.clear();
189 InExpansion = false;
190 Reconstruct.reset();
191}
192
194 IndexedTokenSource TokenSource(AllTokens);
195 Line->FirstStartColumn = FirstStartColumn;
196 do {
197 LLVM_DEBUG(llvm::dbgs() << "----\n");
198 reset();
199 Tokens = &TokenSource;
200 TokenSource.reset();
201
202 readToken();
203 parseFile();
204
205 // If we found an include guard then all preprocessor directives (other than
206 // the guard) are over-indented by one.
207 if (IncludeGuard == IG_Found) {
208 for (auto &Line : Lines)
209 if (Line.InPPDirective && Line.Level > 0)
210 --Line.Level;
211 }
212
213 // Create line with eof token.
214 assert(FormatTok->is(tok::eof));
215 pushToken(FormatTok);
216 addUnwrappedLine();
217
218 // In a first run, format everything with the lines containing macro calls
219 // replaced by the expansion.
220 if (!ExpandedLines.empty()) {
221 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
222 for (const auto &Line : Lines) {
223 if (!Line.Tokens.empty()) {
224 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
225 if (it != ExpandedLines.end()) {
226 for (const auto &Expanded : it->second) {
227 LLVM_DEBUG(printDebugInfo(Expanded));
228 Callback.consumeUnwrappedLine(Expanded);
229 }
230 continue;
231 }
232 }
233 LLVM_DEBUG(printDebugInfo(Line));
234 Callback.consumeUnwrappedLine(Line);
235 }
236 Callback.finishRun();
237 }
238
239 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
240 for (const UnwrappedLine &Line : Lines) {
241 LLVM_DEBUG(printDebugInfo(Line));
242 Callback.consumeUnwrappedLine(Line);
243 }
244 Callback.finishRun();
245 Lines.clear();
246 while (!PPLevelBranchIndex.empty() &&
247 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
248 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
249 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
250 }
251 if (!PPLevelBranchIndex.empty()) {
252 ++PPLevelBranchIndex.back();
253 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
254 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
255 }
256 } while (!PPLevelBranchIndex.empty());
257}
258
259void UnwrappedLineParser::parseFile() {
260 // The top-level context in a file always has declarations, except for pre-
261 // processor directives and JavaScript files.
262 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
263 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
264 MustBeDeclaration);
266 parseBracedList();
267 else
268 parseLevel();
269 // Make sure to format the remaining tokens.
270 //
271 // LK_TextProto is special since its top-level is parsed as the body of a
272 // braced list, which does not necessarily have natural line separators such
273 // as a semicolon. Comments after the last entry that have been determined to
274 // not belong to that line, as in:
275 // key: value
276 // // endfile comment
277 // do not have a chance to be put on a line of their own until this point.
278 // Here we add this newline before end-of-file comments.
279 if (Style.Language == FormatStyle::LK_TextProto &&
280 !CommentsBeforeNextToken.empty()) {
281 addUnwrappedLine();
282 }
283 flushComments(true);
284 addUnwrappedLine();
285}
286
287void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
288 do {
289 switch (FormatTok->Tok.getKind()) {
290 case tok::l_brace:
291 return;
292 default:
293 if (FormatTok->is(Keywords.kw_where)) {
294 addUnwrappedLine();
295 nextToken();
296 parseCSharpGenericTypeConstraint();
297 break;
298 }
299 nextToken();
300 break;
301 }
302 } while (!eof());
303}
304
305void UnwrappedLineParser::parseCSharpAttribute() {
306 int UnpairedSquareBrackets = 1;
307 do {
308 switch (FormatTok->Tok.getKind()) {
309 case tok::r_square:
310 nextToken();
311 --UnpairedSquareBrackets;
312 if (UnpairedSquareBrackets == 0) {
313 addUnwrappedLine();
314 return;
315 }
316 break;
317 case tok::l_square:
318 ++UnpairedSquareBrackets;
319 nextToken();
320 break;
321 default:
322 nextToken();
323 break;
324 }
325 } while (!eof());
326}
327
328bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
329 if (!Lines.empty() && Lines.back().InPPDirective)
330 return true;
331
332 const FormatToken *Previous = Tokens->getPreviousToken();
333 return Previous && Previous->is(tok::comment) &&
334 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
335}
336
337/// \brief Parses a level, that is ???.
338/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
339/// \param CanContainBracedList If the content can contain (at any level) a
340/// braced list.
341/// \param NextLBracesType The type for left brace found in this level.
342/// \param IfKind The \p if statement kind in the level.
343/// \param IfLeftBrace The left brace of the \p if block in the level.
344/// \returns true if a simple block of if/else/for/while, or false otherwise.
345/// (A simple block has a single statement.)
346bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
347 bool CanContainBracedList,
348 TokenType NextLBracesType,
349 IfStmtKind *IfKind,
350 FormatToken **IfLeftBrace) {
351 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
352 ? TT_BracedListLBrace
353 : TT_Unknown;
354 const bool IsPrecededByCommentOrPPDirective =
355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356 FormatToken *IfLBrace = nullptr;
357 bool HasDoWhile = false;
358 bool HasLabel = false;
359 unsigned StatementCount = 0;
360 bool SwitchLabelEncountered = false;
361
362 do {
363 if (FormatTok->getType() == TT_AttributeMacro) {
364 nextToken();
365 continue;
366 }
367 tok::TokenKind kind = FormatTok->Tok.getKind();
368 if (FormatTok->getType() == TT_MacroBlockBegin)
369 kind = tok::l_brace;
370 else if (FormatTok->getType() == TT_MacroBlockEnd)
371 kind = tok::r_brace;
372
373 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
374 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
375 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
376 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
377 HasLabel ? nullptr : &HasLabel);
378 ++StatementCount;
379 assert(StatementCount > 0 && "StatementCount overflow!");
380 };
381
382 switch (kind) {
383 case tok::comment:
384 nextToken();
385 addUnwrappedLine();
386 break;
387 case tok::l_brace:
388 if (NextLBracesType != TT_Unknown) {
389 FormatTok->setFinalizedType(NextLBracesType);
390 } else if (FormatTok->Previous &&
391 FormatTok->Previous->ClosesRequiresClause) {
392 // We need the 'default' case here to correctly parse a function
393 // l_brace.
394 ParseDefault();
395 continue;
396 }
397 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
398 tryToParseBracedList()) {
399 continue;
400 }
401 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
402 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
403 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
404 NextLBracesType);
405 ++StatementCount;
406 assert(StatementCount > 0 && "StatementCount overflow!");
407 addUnwrappedLine();
408 break;
409 case tok::r_brace:
410 if (OpeningBrace) {
411 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
413 return false;
414 }
415 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
416 HasDoWhile || IsPrecededByCommentOrPPDirective ||
417 precededByCommentOrPPDirective()) {
418 return false;
419 }
420 const FormatToken *Next = Tokens->peekNextToken();
421 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
422 return false;
423 if (IfLeftBrace)
424 *IfLeftBrace = IfLBrace;
425 return true;
426 }
427 nextToken();
428 addUnwrappedLine();
429 break;
430 case tok::kw_default: {
431 unsigned StoredPosition = Tokens->getPosition();
432 FormatToken *Next;
433 do {
434 Next = Tokens->getNextToken();
435 assert(Next);
436 } while (Next->is(tok::comment));
437 FormatTok = Tokens->setPosition(StoredPosition);
438 if (Next->isNot(tok::colon)) {
439 // default not followed by ':' is not a case label; treat it like
440 // an identifier.
441 parseStructuralElement();
442 break;
443 }
444 // Else, if it is 'default:', fall through to the case handling.
445 [[fallthrough]];
446 }
447 case tok::kw_case:
448 if (Style.isProto() || Style.isVerilog() ||
449 (Style.isJavaScript() && Line->MustBeDeclaration)) {
450 // Proto: there are no switch/case statements
451 // Verilog: Case labels don't have this word. We handle case
452 // labels including default in TokenAnnotator.
453 // JavaScript: A 'case: string' style field declaration.
454 ParseDefault();
455 break;
456 }
457 if (!SwitchLabelEncountered &&
458 (Style.IndentCaseLabels ||
459 (Line->InPPDirective && Line->Level == 1))) {
460 ++Line->Level;
461 }
462 SwitchLabelEncountered = true;
463 parseStructuralElement();
464 break;
465 case tok::l_square:
466 if (Style.isCSharp()) {
467 nextToken();
468 parseCSharpAttribute();
469 break;
470 }
471 if (handleCppAttributes())
472 break;
473 [[fallthrough]];
474 default:
475 ParseDefault();
476 break;
477 }
478 } while (!eof());
479
480 return false;
481}
482
483void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
484 // We'll parse forward through the tokens until we hit
485 // a closing brace or eof - note that getNextToken() will
486 // parse macros, so this will magically work inside macro
487 // definitions, too.
488 unsigned StoredPosition = Tokens->getPosition();
489 FormatToken *Tok = FormatTok;
490 const FormatToken *PrevTok = Tok->Previous;
491 // Keep a stack of positions of lbrace tokens. We will
492 // update information about whether an lbrace starts a
493 // braced init list or a different block during the loop.
494 SmallVector<FormatToken *, 8> LBraceStack;
495 assert(Tok->is(tok::l_brace));
496 do {
497 // Get next non-comment token.
498 FormatToken *NextTok;
499 do {
500 NextTok = Tokens->getNextToken();
501 } while (NextTok->is(tok::comment));
502
503 switch (Tok->Tok.getKind()) {
504 case tok::l_brace:
505 if (Style.isJavaScript() && PrevTok) {
506 if (PrevTok->isOneOf(tok::colon, tok::less)) {
507 // A ':' indicates this code is in a type, or a braced list
508 // following a label in an object literal ({a: {b: 1}}).
509 // A '<' could be an object used in a comparison, but that is nonsense
510 // code (can never return true), so more likely it is a generic type
511 // argument (`X<{a: string; b: number}>`).
512 // The code below could be confused by semicolons between the
513 // individual members in a type member list, which would normally
514 // trigger BK_Block. In both cases, this must be parsed as an inline
515 // braced init.
517 } else if (PrevTok->is(tok::r_paren)) {
518 // `) { }` can only occur in function or method declarations in JS.
519 Tok->setBlockKind(BK_Block);
520 }
521 } else {
522 Tok->setBlockKind(BK_Unknown);
523 }
524 LBraceStack.push_back(Tok);
525 break;
526 case tok::r_brace:
527 if (LBraceStack.empty())
528 break;
529 if (LBraceStack.back()->is(BK_Unknown)) {
530 bool ProbablyBracedList = false;
531 if (Style.Language == FormatStyle::LK_Proto) {
532 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
533 } else {
534 // Skip NextTok over preprocessor lines, otherwise we may not
535 // properly diagnose the block as a braced intializer
536 // if the comma separator appears after the pp directive.
537 while (NextTok->is(tok::hash)) {
538 ScopedMacroState MacroState(*Line, Tokens, NextTok);
539 do {
540 NextTok = Tokens->getNextToken();
541 } while (NextTok->isNot(tok::eof));
542 }
543
544 // Using OriginalColumn to distinguish between ObjC methods and
545 // binary operators is a bit hacky.
546 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
547 NextTok->OriginalColumn == 0;
548
549 // Try to detect a braced list. Note that regardless how we mark inner
550 // braces here, we will overwrite the BlockKind later if we parse a
551 // braced list (where all blocks inside are by default braced lists),
552 // or when we explicitly detect blocks (for example while parsing
553 // lambdas).
554
555 // If we already marked the opening brace as braced list, the closing
556 // must also be part of it.
557 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
558
559 ProbablyBracedList = ProbablyBracedList ||
560 (Style.isJavaScript() &&
561 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
562 Keywords.kw_as));
563 ProbablyBracedList = ProbablyBracedList ||
564 (Style.isCpp() && NextTok->is(tok::l_paren));
565
566 // If there is a comma, semicolon or right paren after the closing
567 // brace, we assume this is a braced initializer list.
568 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
569 // braced list in JS.
570 ProbablyBracedList =
571 ProbablyBracedList ||
572 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
573 tok::r_paren, tok::r_square, tok::l_brace,
574 tok::ellipsis);
575
576 ProbablyBracedList =
577 ProbablyBracedList ||
578 (NextTok->is(tok::identifier) &&
579 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
580
581 ProbablyBracedList = ProbablyBracedList ||
582 (NextTok->is(tok::semi) &&
583 (!ExpectClassBody || LBraceStack.size() != 1));
584
585 ProbablyBracedList =
586 ProbablyBracedList ||
587 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
588
589 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
590 // We can have an array subscript after a braced init
591 // list, but C++11 attributes are expected after blocks.
592 NextTok = Tokens->getNextToken();
593 ProbablyBracedList = NextTok->isNot(tok::l_square);
594 }
595 }
596 if (ProbablyBracedList) {
597 Tok->setBlockKind(BK_BracedInit);
598 LBraceStack.back()->setBlockKind(BK_BracedInit);
599 } else {
600 Tok->setBlockKind(BK_Block);
601 LBraceStack.back()->setBlockKind(BK_Block);
602 }
603 }
604 LBraceStack.pop_back();
605 break;
606 case tok::identifier:
607 if (!Tok->is(TT_StatementMacro))
608 break;
609 [[fallthrough]];
610 case tok::at:
611 case tok::semi:
612 case tok::kw_if:
613 case tok::kw_while:
614 case tok::kw_for:
615 case tok::kw_switch:
616 case tok::kw_try:
617 case tok::kw___try:
618 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
619 LBraceStack.back()->setBlockKind(BK_Block);
620 break;
621 default:
622 break;
623 }
624 PrevTok = Tok;
625 Tok = NextTok;
626 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
627
628 // Assume other blocks for all unclosed opening braces.
629 for (FormatToken *LBrace : LBraceStack)
630 if (LBrace->is(BK_Unknown))
631 LBrace->setBlockKind(BK_Block);
632
633 FormatTok = Tokens->setPosition(StoredPosition);
634}
635
636template <class T>
637static inline void hash_combine(std::size_t &seed, const T &v) {
638 std::hash<T> hasher;
639 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
640}
641
642size_t UnwrappedLineParser::computePPHash() const {
643 size_t h = 0;
644 for (const auto &i : PPStack) {
645 hash_combine(h, size_t(i.Kind));
646 hash_combine(h, i.Line);
647 }
648 return h;
649}
650
651// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
652// is not null, subtracts its length (plus the preceding space) when computing
653// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
654// running the token annotator on it so that we can restore them afterward.
655bool UnwrappedLineParser::mightFitOnOneLine(
656 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
657 const auto ColumnLimit = Style.ColumnLimit;
658 if (ColumnLimit == 0)
659 return true;
660
661 auto &Tokens = ParsedLine.Tokens;
662 assert(!Tokens.empty());
663
664 const auto *LastToken = Tokens.back().Tok;
665 assert(LastToken);
666
667 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
668
669 int Index = 0;
670 for (const auto &Token : Tokens) {
671 assert(Token.Tok);
672 auto &SavedToken = SavedTokens[Index++];
673 SavedToken.Tok = new FormatToken;
674 SavedToken.Tok->copyFrom(*Token.Tok);
675 SavedToken.Children = std::move(Token.Children);
676 }
677
678 AnnotatedLine Line(ParsedLine);
679 assert(Line.Last == LastToken);
680
681 TokenAnnotator Annotator(Style, Keywords);
682 Annotator.annotate(Line);
683 Annotator.calculateFormattingInformation(Line);
684
685 auto Length = LastToken->TotalLength;
686 if (OpeningBrace) {
687 assert(OpeningBrace != Tokens.front().Tok);
688 if (auto Prev = OpeningBrace->Previous;
689 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
690 Length -= ColumnLimit;
691 }
692 Length -= OpeningBrace->TokenText.size() + 1;
693 }
694
695 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
696 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
697 Length -= FirstToken->TokenText.size() + 1;
698 }
699
700 Index = 0;
701 for (auto &Token : Tokens) {
702 const auto &SavedToken = SavedTokens[Index++];
703 Token.Tok->copyFrom(*SavedToken.Tok);
704 Token.Children = std::move(SavedToken.Children);
705 delete SavedToken.Tok;
706 }
707
708 // If these change PPLevel needs to be used for get correct indentation.
709 assert(!Line.InMacroBody);
710 assert(!Line.InPPDirective);
711 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
712}
713
714FormatToken *UnwrappedLineParser::parseBlock(
715 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
716 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
717 bool CanContainBracedList, TokenType NextLBracesType) {
718 auto HandleVerilogBlockLabel = [this]() {
719 // ":" name
720 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
721 nextToken();
722 if (Keywords.isVerilogIdentifier(*FormatTok))
723 nextToken();
724 }
725 };
726
727 // Whether this is a Verilog-specific block that has a special header like a
728 // module.
729 const bool VerilogHierarchy =
730 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
731 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
732 (Style.isVerilog() &&
733 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
734 "'{' or macro block token expected");
735 FormatToken *Tok = FormatTok;
736 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
737 auto Index = CurrentLines->size();
738 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
739 FormatTok->setBlockKind(BK_Block);
740
741 // For Whitesmiths mode, jump to the next level prior to skipping over the
742 // braces.
743 if (!VerilogHierarchy && AddLevels > 0 &&
745 ++Line->Level;
746 }
747
748 size_t PPStartHash = computePPHash();
749
750 const unsigned InitialLevel = Line->Level;
751 if (VerilogHierarchy) {
752 AddLevels += parseVerilogHierarchyHeader();
753 } else {
754 nextToken(/*LevelDifference=*/AddLevels);
755 HandleVerilogBlockLabel();
756 }
757
758 // Bail out if there are too many levels. Otherwise, the stack might overflow.
759 if (Line->Level > 300)
760 return nullptr;
761
762 if (MacroBlock && FormatTok->is(tok::l_paren))
763 parseParens();
764
765 size_t NbPreprocessorDirectives =
766 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
767 addUnwrappedLine();
768 size_t OpeningLineIndex =
769 CurrentLines->empty()
771 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
772
773 // Whitesmiths is weird here. The brace needs to be indented for the namespace
774 // block, but the block itself may not be indented depending on the style
775 // settings. This allows the format to back up one level in those cases.
776 if (UnindentWhitesmithsBraces)
777 --Line->Level;
778
779 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
780 MustBeDeclaration);
781 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
782 Line->Level += AddLevels;
783
784 FormatToken *IfLBrace = nullptr;
785 const bool SimpleBlock =
786 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
787
788 if (eof())
789 return IfLBrace;
790
791 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
792 : !FormatTok->is(tok::r_brace)) {
793 Line->Level = InitialLevel;
794 FormatTok->setBlockKind(BK_Block);
795 return IfLBrace;
796 }
797
798 const bool IsFunctionRBrace =
799 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
800
801 auto RemoveBraces = [=]() mutable {
802 if (!SimpleBlock)
803 return false;
804 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
805 assert(FormatTok->is(tok::r_brace));
806 const bool WrappedOpeningBrace = !Tok->Previous;
807 if (WrappedOpeningBrace && FollowedByComment)
808 return false;
809 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
810 if (KeepBraces && !HasRequiredIfBraces)
811 return false;
812 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
813 const FormatToken *Previous = Tokens->getPreviousToken();
814 assert(Previous);
815 if (Previous->is(tok::r_brace) && !Previous->Optional)
816 return false;
817 }
818 assert(!CurrentLines->empty());
819 auto &LastLine = CurrentLines->back();
820 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
821 return false;
822 if (Tok->is(TT_ElseLBrace))
823 return true;
824 if (WrappedOpeningBrace) {
825 assert(Index > 0);
826 --Index; // The line above the wrapped l_brace.
827 Tok = nullptr;
828 }
829 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
830 };
831 if (RemoveBraces()) {
832 Tok->MatchingParen = FormatTok;
833 FormatTok->MatchingParen = Tok;
834 }
835
836 size_t PPEndHash = computePPHash();
837
838 // Munch the closing brace.
839 nextToken(/*LevelDifference=*/-AddLevels);
840
841 // When this is a function block and there is an unnecessary semicolon
842 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
843 // it later).
844 if (Style.RemoveSemicolon && IsFunctionRBrace) {
845 while (FormatTok->is(tok::semi)) {
846 FormatTok->Optional = true;
847 nextToken();
848 }
849 }
850
851 HandleVerilogBlockLabel();
852
853 if (MacroBlock && FormatTok->is(tok::l_paren))
854 parseParens();
855
856 Line->Level = InitialLevel;
857
858 if (FormatTok->is(tok::kw_noexcept)) {
859 // A noexcept in a requires expression.
860 nextToken();
861 }
862
863 if (FormatTok->is(tok::arrow)) {
864 // Following the } or noexcept we can find a trailing return type arrow
865 // as part of an implicit conversion constraint.
866 nextToken();
867 parseStructuralElement();
868 }
869
870 if (MunchSemi && FormatTok->is(tok::semi))
871 nextToken();
872
873 if (PPStartHash == PPEndHash) {
874 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
875 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
876 // Update the opening line to add the forward reference as well
877 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
878 CurrentLines->size() - 1;
879 }
880 }
881
882 return IfLBrace;
883}
884
885static bool isGoogScope(const UnwrappedLine &Line) {
886 // FIXME: Closure-library specific stuff should not be hard-coded but be
887 // configurable.
888 if (Line.Tokens.size() < 4)
889 return false;
890 auto I = Line.Tokens.begin();
891 if (I->Tok->TokenText != "goog")
892 return false;
893 ++I;
894 if (I->Tok->isNot(tok::period))
895 return false;
896 ++I;
897 if (I->Tok->TokenText != "scope")
898 return false;
899 ++I;
900 return I->Tok->is(tok::l_paren);
901}
902
903static bool isIIFE(const UnwrappedLine &Line,
904 const AdditionalKeywords &Keywords) {
905 // Look for the start of an immediately invoked anonymous function.
906 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
907 // This is commonly done in JavaScript to create a new, anonymous scope.
908 // Example: (function() { ... })()
909 if (Line.Tokens.size() < 3)
910 return false;
911 auto I = Line.Tokens.begin();
912 if (I->Tok->isNot(tok::l_paren))
913 return false;
914 ++I;
915 if (I->Tok->isNot(Keywords.kw_function))
916 return false;
917 ++I;
918 return I->Tok->is(tok::l_paren);
919}
920
921static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
922 const FormatToken &InitialToken) {
923 tok::TokenKind Kind = InitialToken.Tok.getKind();
924 if (InitialToken.is(TT_NamespaceMacro))
925 Kind = tok::kw_namespace;
926
927 switch (Kind) {
928 case tok::kw_namespace:
929 return Style.BraceWrapping.AfterNamespace;
930 case tok::kw_class:
931 return Style.BraceWrapping.AfterClass;
932 case tok::kw_union:
933 return Style.BraceWrapping.AfterUnion;
934 case tok::kw_struct:
935 return Style.BraceWrapping.AfterStruct;
936 case tok::kw_enum:
937 return Style.BraceWrapping.AfterEnum;
938 default:
939 return false;
940 }
941}
942
943void UnwrappedLineParser::parseChildBlock(
944 bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
945 assert(FormatTok->is(tok::l_brace));
946 FormatTok->setBlockKind(BK_Block);
947 const FormatToken *OpeningBrace = FormatTok;
948 nextToken();
949 {
950 bool SkipIndent = (Style.isJavaScript() &&
951 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
952 ScopedLineState LineState(*this);
953 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
954 /*MustBeDeclaration=*/false);
955 Line->Level += SkipIndent ? 0 : 1;
956 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
957 flushComments(isOnNewLine(*FormatTok));
958 Line->Level -= SkipIndent ? 0 : 1;
959 }
960 nextToken();
961}
962
963void UnwrappedLineParser::parsePPDirective() {
964 assert(FormatTok->is(tok::hash) && "'#' expected");
965 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
966
967 nextToken();
968
969 if (!FormatTok->Tok.getIdentifierInfo()) {
970 parsePPUnknown();
971 return;
972 }
973
974 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
975 case tok::pp_define:
976 parsePPDefine();
977 return;
978 case tok::pp_if:
979 parsePPIf(/*IfDef=*/false);
980 break;
981 case tok::pp_ifdef:
982 case tok::pp_ifndef:
983 parsePPIf(/*IfDef=*/true);
984 break;
985 case tok::pp_else:
986 case tok::pp_elifdef:
987 case tok::pp_elifndef:
988 case tok::pp_elif:
989 parsePPElse();
990 break;
991 case tok::pp_endif:
992 parsePPEndIf();
993 break;
994 case tok::pp_pragma:
995 parsePPPragma();
996 break;
997 default:
998 parsePPUnknown();
999 break;
1000 }
1001}
1002
1003void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1004 size_t Line = CurrentLines->size();
1005 if (CurrentLines == &PreprocessorDirectives)
1006 Line += Lines.size();
1007
1008 if (Unreachable ||
1009 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1010 PPStack.push_back({PP_Unreachable, Line});
1011 } else {
1012 PPStack.push_back({PP_Conditional, Line});
1013 }
1014}
1015
1016void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1017 ++PPBranchLevel;
1018 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1019 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1020 PPLevelBranchIndex.push_back(0);
1021 PPLevelBranchCount.push_back(0);
1022 }
1023 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1024 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1025 conditionalCompilationCondition(Unreachable || Skip);
1026}
1027
1028void UnwrappedLineParser::conditionalCompilationAlternative() {
1029 if (!PPStack.empty())
1030 PPStack.pop_back();
1031 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1032 if (!PPChainBranchIndex.empty())
1033 ++PPChainBranchIndex.top();
1034 conditionalCompilationCondition(
1035 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1036 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1037}
1038
1039void UnwrappedLineParser::conditionalCompilationEnd() {
1040 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1041 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1042 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1043 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1044 }
1045 // Guard against #endif's without #if.
1046 if (PPBranchLevel > -1)
1047 --PPBranchLevel;
1048 if (!PPChainBranchIndex.empty())
1049 PPChainBranchIndex.pop();
1050 if (!PPStack.empty())
1051 PPStack.pop_back();
1052}
1053
1054void UnwrappedLineParser::parsePPIf(bool IfDef) {
1055 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1056 nextToken();
1057 bool Unreachable = false;
1058 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1059 Unreachable = true;
1060 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1061 Unreachable = true;
1062 conditionalCompilationStart(Unreachable);
1063 FormatToken *IfCondition = FormatTok;
1064 // If there's a #ifndef on the first line, and the only lines before it are
1065 // comments, it could be an include guard.
1066 bool MaybeIncludeGuard = IfNDef;
1067 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1068 for (auto &Line : Lines) {
1069 if (!Line.Tokens.front().Tok->is(tok::comment)) {
1070 MaybeIncludeGuard = false;
1071 IncludeGuard = IG_Rejected;
1072 break;
1073 }
1074 }
1075 }
1076 --PPBranchLevel;
1077 parsePPUnknown();
1078 ++PPBranchLevel;
1079 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1080 IncludeGuard = IG_IfNdefed;
1081 IncludeGuardToken = IfCondition;
1082 }
1083}
1084
1085void UnwrappedLineParser::parsePPElse() {
1086 // If a potential include guard has an #else, it's not an include guard.
1087 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1088 IncludeGuard = IG_Rejected;
1089 // Don't crash when there is an #else without an #if.
1090 assert(PPBranchLevel >= -1);
1091 if (PPBranchLevel == -1)
1092 conditionalCompilationStart(/*Unreachable=*/true);
1093 conditionalCompilationAlternative();
1094 --PPBranchLevel;
1095 parsePPUnknown();
1096 ++PPBranchLevel;
1097}
1098
1099void UnwrappedLineParser::parsePPEndIf() {
1100 conditionalCompilationEnd();
1101 parsePPUnknown();
1102 // If the #endif of a potential include guard is the last thing in the file,
1103 // then we found an include guard.
1104 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1106 IncludeGuard = IG_Found;
1107 }
1108}
1109
1110void UnwrappedLineParser::parsePPDefine() {
1111 nextToken();
1112
1113 if (!FormatTok->Tok.getIdentifierInfo()) {
1114 IncludeGuard = IG_Rejected;
1115 IncludeGuardToken = nullptr;
1116 parsePPUnknown();
1117 return;
1118 }
1119
1120 if (IncludeGuard == IG_IfNdefed &&
1121 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1122 IncludeGuard = IG_Defined;
1123 IncludeGuardToken = nullptr;
1124 for (auto &Line : Lines) {
1125 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1126 IncludeGuard = IG_Rejected;
1127 break;
1128 }
1129 }
1130 }
1131
1132 // In the context of a define, even keywords should be treated as normal
1133 // identifiers. Setting the kind to identifier is not enough, because we need
1134 // to treat additional keywords like __except as well, which are already
1135 // identifiers. Setting the identifier info to null interferes with include
1136 // guard processing above, and changes preprocessing nesting.
1137 FormatTok->Tok.setKind(tok::identifier);
1139 nextToken();
1140 if (FormatTok->Tok.getKind() == tok::l_paren &&
1141 !FormatTok->hasWhitespaceBefore()) {
1142 parseParens();
1143 }
1145 Line->Level += PPBranchLevel + 1;
1146 addUnwrappedLine();
1147 ++Line->Level;
1148
1149 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1150 assert((int)Line->PPLevel >= 0);
1151 Line->InMacroBody = true;
1152
1153 // Errors during a preprocessor directive can only affect the layout of the
1154 // preprocessor directive, and thus we ignore them. An alternative approach
1155 // would be to use the same approach we use on the file level (no
1156 // re-indentation if there was a structural error) within the macro
1157 // definition.
1158 parseFile();
1159}
1160
1161void UnwrappedLineParser::parsePPPragma() {
1162 Line->InPragmaDirective = true;
1163 parsePPUnknown();
1164}
1165
1166void UnwrappedLineParser::parsePPUnknown() {
1167 do {
1168 nextToken();
1169 } while (!eof());
1171 Line->Level += PPBranchLevel + 1;
1172 addUnwrappedLine();
1173}
1174
1175// Here we exclude certain tokens that are not usually the first token in an
1176// unwrapped line. This is used in attempt to distinguish macro calls without
1177// trailing semicolons from other constructs split to several lines.
1178static bool tokenCanStartNewLine(const FormatToken &Tok) {
1179 // Semicolon can be a null-statement, l_square can be a start of a macro or
1180 // a C++11 attribute, but this doesn't seem to be common.
1181 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1182 Tok.isNot(TT_AttributeSquare) &&
1183 // Tokens that can only be used as binary operators and a part of
1184 // overloaded operator names.
1185 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1186 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1187 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1188 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1189 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1190 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1191 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1192 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1193 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1194 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1195 Tok.isNot(tok::lesslessequal) &&
1196 // Colon is used in labels, base class lists, initializer lists,
1197 // range-based for loops, ternary operator, but should never be the
1198 // first token in an unwrapped line.
1199 Tok.isNot(tok::colon) &&
1200 // 'noexcept' is a trailing annotation.
1201 Tok.isNot(tok::kw_noexcept);
1202}
1203
1204static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1205 const FormatToken *FormatTok) {
1206 // FIXME: This returns true for C/C++ keywords like 'struct'.
1207 return FormatTok->is(tok::identifier) &&
1208 (!FormatTok->Tok.getIdentifierInfo() ||
1209 !FormatTok->isOneOf(
1210 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1211 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1212 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1213 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1214 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1215 Keywords.kw_instanceof, Keywords.kw_interface,
1216 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1217}
1218
1219static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1220 const FormatToken *FormatTok) {
1221 return FormatTok->Tok.isLiteral() ||
1222 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1223 mustBeJSIdent(Keywords, FormatTok);
1224}
1225
1226// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1227// when encountered after a value (see mustBeJSIdentOrValue).
1228static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1229 const FormatToken *FormatTok) {
1230 return FormatTok->isOneOf(
1231 tok::kw_return, Keywords.kw_yield,
1232 // conditionals
1233 tok::kw_if, tok::kw_else,
1234 // loops
1235 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1236 // switch/case
1237 tok::kw_switch, tok::kw_case,
1238 // exceptions
1239 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1240 // declaration
1241 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1242 Keywords.kw_async, Keywords.kw_function,
1243 // import/export
1244 Keywords.kw_import, tok::kw_export);
1245}
1246
1247// Checks whether a token is a type in K&R C (aka C78).
1248static bool isC78Type(const FormatToken &Tok) {
1249 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1250 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1251 tok::identifier);
1252}
1253
1254// This function checks whether a token starts the first parameter declaration
1255// in a K&R C (aka C78) function definition, e.g.:
1256// int f(a, b)
1257// short a, b;
1258// {
1259// return a + b;
1260// }
1261static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1262 const FormatToken *FuncName) {
1263 assert(Tok);
1264 assert(Next);
1265 assert(FuncName);
1266
1267 if (FuncName->isNot(tok::identifier))
1268 return false;
1269
1270 const FormatToken *Prev = FuncName->Previous;
1271 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1272 return false;
1273
1274 if (!isC78Type(*Tok) &&
1275 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1276 return false;
1277 }
1278
1279 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1280 return false;
1281
1282 Tok = Tok->Previous;
1283 if (!Tok || Tok->isNot(tok::r_paren))
1284 return false;
1285
1286 Tok = Tok->Previous;
1287 if (!Tok || Tok->isNot(tok::identifier))
1288 return false;
1289
1290 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1291}
1292
1293bool UnwrappedLineParser::parseModuleImport() {
1294 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1295
1296 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1297 !Token->Tok.getIdentifierInfo() &&
1298 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1299 return false;
1300 }
1301
1302 nextToken();
1303 while (!eof()) {
1304 if (FormatTok->is(tok::colon)) {
1305 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1306 }
1307 // Handle import <foo/bar.h> as we would an include statement.
1308 else if (FormatTok->is(tok::less)) {
1309 nextToken();
1310 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1311 // Mark tokens up to the trailing line comments as implicit string
1312 // literals.
1313 if (FormatTok->isNot(tok::comment) &&
1314 !FormatTok->TokenText.startswith("//")) {
1315 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1316 }
1317 nextToken();
1318 }
1319 }
1320 if (FormatTok->is(tok::semi)) {
1321 nextToken();
1322 break;
1323 }
1324 nextToken();
1325 }
1326
1327 addUnwrappedLine();
1328 return true;
1329}
1330
1331// readTokenWithJavaScriptASI reads the next token and terminates the current
1332// line if JavaScript Automatic Semicolon Insertion must
1333// happen between the current token and the next token.
1334//
1335// This method is conservative - it cannot cover all edge cases of JavaScript,
1336// but only aims to correctly handle certain well known cases. It *must not*
1337// return true in speculative cases.
1338void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1339 FormatToken *Previous = FormatTok;
1340 readToken();
1341 FormatToken *Next = FormatTok;
1342
1343 bool IsOnSameLine =
1344 CommentsBeforeNextToken.empty()
1345 ? Next->NewlinesBefore == 0
1346 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1347 if (IsOnSameLine)
1348 return;
1349
1350 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1351 bool PreviousStartsTemplateExpr =
1352 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1353 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1354 // If the line contains an '@' sign, the previous token might be an
1355 // annotation, which can precede another identifier/value.
1356 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1357 return LineNode.Tok->is(tok::at);
1358 });
1359 if (HasAt)
1360 return;
1361 }
1362 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1363 return addUnwrappedLine();
1364 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1365 bool NextEndsTemplateExpr =
1366 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1367 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1368 (PreviousMustBeValue ||
1369 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1370 tok::minusminus))) {
1371 return addUnwrappedLine();
1372 }
1373 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1374 isJSDeclOrStmt(Keywords, Next)) {
1375 return addUnwrappedLine();
1376 }
1377}
1378
1379void UnwrappedLineParser::parseStructuralElement(
1380 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1381 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1382 if (Style.Language == FormatStyle::LK_TableGen &&
1383 FormatTok->is(tok::pp_include)) {
1384 nextToken();
1385 if (FormatTok->is(tok::string_literal))
1386 nextToken();
1387 addUnwrappedLine();
1388 return;
1389 }
1390
1391 if (Style.isVerilog()) {
1392 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1393 parseForOrWhileLoop(/*HasParens=*/false);
1394 return;
1395 }
1396
1397 // Skip things that can exist before keywords like 'if' and 'case'.
1398 while (true) {
1399 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1400 Keywords.kw_unique0)) {
1401 nextToken();
1402 } else if (FormatTok->is(tok::l_paren) &&
1403 Tokens->peekNextToken()->is(tok::star)) {
1404 parseParens();
1405 } else {
1406 break;
1407 }
1408 }
1409 }
1410
1411 // Tokens that only make sense at the beginning of a line.
1412 switch (FormatTok->Tok.getKind()) {
1413 case tok::kw_asm:
1414 nextToken();
1415 if (FormatTok->is(tok::l_brace)) {
1416 FormatTok->setFinalizedType(TT_InlineASMBrace);
1417 nextToken();
1418 while (FormatTok && !eof()) {
1419 if (FormatTok->is(tok::r_brace)) {
1420 FormatTok->setFinalizedType(TT_InlineASMBrace);
1421 nextToken();
1422 addUnwrappedLine();
1423 break;
1424 }
1425 FormatTok->Finalized = true;
1426 nextToken();
1427 }
1428 }
1429 break;
1430 case tok::kw_namespace:
1431 parseNamespace();
1432 return;
1433 case tok::kw_public:
1434 case tok::kw_protected:
1435 case tok::kw_private:
1436 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1437 Style.isCSharp()) {
1438 nextToken();
1439 } else {
1440 parseAccessSpecifier();
1441 }
1442 return;
1443 case tok::kw_if: {
1444 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1445 // field/method declaration.
1446 break;
1447 }
1448 FormatToken *Tok = parseIfThenElse(IfKind);
1449 if (IfLeftBrace)
1450 *IfLeftBrace = Tok;
1451 return;
1452 }
1453 case tok::kw_for:
1454 case tok::kw_while:
1455 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1456 // field/method declaration.
1457 break;
1458 }
1459 parseForOrWhileLoop();
1460 return;
1461 case tok::kw_do:
1462 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1463 // field/method declaration.
1464 break;
1465 }
1466 parseDoWhile();
1467 if (HasDoWhile)
1468 *HasDoWhile = true;
1469 return;
1470 case tok::kw_switch:
1471 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1472 // 'switch: string' field declaration.
1473 break;
1474 }
1475 parseSwitch();
1476 return;
1477 case tok::kw_default:
1478 // In Verilog default along with other labels are handled in the next loop.
1479 if (Style.isVerilog())
1480 break;
1481 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1482 // 'default: string' field declaration.
1483 break;
1484 }
1485 nextToken();
1486 if (FormatTok->is(tok::colon)) {
1487 parseLabel();
1488 return;
1489 }
1490 // e.g. "default void f() {}" in a Java interface.
1491 break;
1492 case tok::kw_case:
1493 // Proto: there are no switch/case statements.
1494 if (Style.isProto()) {
1495 nextToken();
1496 return;
1497 }
1498 if (Style.isVerilog()) {
1499 parseBlock();
1500 addUnwrappedLine();
1501 return;
1502 }
1503 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1504 // 'case: string' field declaration.
1505 nextToken();
1506 break;
1507 }
1508 parseCaseLabel();
1509 return;
1510 case tok::kw_try:
1511 case tok::kw___try:
1512 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1513 // field/method declaration.
1514 break;
1515 }
1516 parseTryCatch();
1517 return;
1518 case tok::kw_extern:
1519 nextToken();
1520 if (Style.isVerilog()) {
1521 // In Verilog and extern module declaration looks like a start of module.
1522 // But there is no body and endmodule. So we handle it separately.
1523 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1524 parseVerilogHierarchyHeader();
1525 return;
1526 }
1527 } else if (FormatTok->is(tok::string_literal)) {
1528 nextToken();
1529 if (FormatTok->is(tok::l_brace)) {
1531 addUnwrappedLine();
1532 // Either we indent or for backwards compatibility we follow the
1533 // AfterExternBlock style.
1534 unsigned AddLevels =
1537 Style.IndentExternBlock ==
1539 ? 1u
1540 : 0u;
1541 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1542 addUnwrappedLine();
1543 return;
1544 }
1545 }
1546 break;
1547 case tok::kw_export:
1548 if (Style.isJavaScript()) {
1549 parseJavaScriptEs6ImportExport();
1550 return;
1551 }
1552 if (Style.isCpp()) {
1553 nextToken();
1554 if (FormatTok->is(tok::kw_namespace)) {
1555 parseNamespace();
1556 return;
1557 }
1558 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1559 return;
1560 }
1561 break;
1562 case tok::kw_inline:
1563 nextToken();
1564 if (FormatTok->is(tok::kw_namespace)) {
1565 parseNamespace();
1566 return;
1567 }
1568 break;
1569 case tok::identifier:
1570 if (FormatTok->is(TT_ForEachMacro)) {
1571 parseForOrWhileLoop();
1572 return;
1573 }
1574 if (FormatTok->is(TT_MacroBlockBegin)) {
1575 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1576 /*MunchSemi=*/false);
1577 return;
1578 }
1579 if (FormatTok->is(Keywords.kw_import)) {
1580 if (Style.isJavaScript()) {
1581 parseJavaScriptEs6ImportExport();
1582 return;
1583 }
1584 if (Style.Language == FormatStyle::LK_Proto) {
1585 nextToken();
1586 if (FormatTok->is(tok::kw_public))
1587 nextToken();
1588 if (!FormatTok->is(tok::string_literal))
1589 return;
1590 nextToken();
1591 if (FormatTok->is(tok::semi))
1592 nextToken();
1593 addUnwrappedLine();
1594 return;
1595 }
1596 if (Style.isCpp() && parseModuleImport())
1597 return;
1598 }
1599 if (Style.isCpp() &&
1600 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1601 Keywords.kw_slots, Keywords.kw_qslots)) {
1602 nextToken();
1603 if (FormatTok->is(tok::colon)) {
1604 nextToken();
1605 addUnwrappedLine();
1606 return;
1607 }
1608 }
1609 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1610 parseStatementMacro();
1611 return;
1612 }
1613 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1614 parseNamespace();
1615 return;
1616 }
1617 // In all other cases, parse the declaration.
1618 break;
1619 default:
1620 break;
1621 }
1622 do {
1623 const FormatToken *Previous = FormatTok->Previous;
1624 switch (FormatTok->Tok.getKind()) {
1625 case tok::at:
1626 nextToken();
1627 if (FormatTok->is(tok::l_brace)) {
1628 nextToken();
1629 parseBracedList();
1630 break;
1631 } else if (Style.Language == FormatStyle::LK_Java &&
1632 FormatTok->is(Keywords.kw_interface)) {
1633 nextToken();
1634 break;
1635 }
1636 switch (FormatTok->Tok.getObjCKeywordID()) {
1637 case tok::objc_public:
1638 case tok::objc_protected:
1639 case tok::objc_package:
1640 case tok::objc_private:
1641 return parseAccessSpecifier();
1642 case tok::objc_interface:
1643 case tok::objc_implementation:
1644 return parseObjCInterfaceOrImplementation();
1645 case tok::objc_protocol:
1646 if (parseObjCProtocol())
1647 return;
1648 break;
1649 case tok::objc_end:
1650 return; // Handled by the caller.
1651 case tok::objc_optional:
1652 case tok::objc_required:
1653 nextToken();
1654 addUnwrappedLine();
1655 return;
1656 case tok::objc_autoreleasepool:
1657 nextToken();
1658 if (FormatTok->is(tok::l_brace)) {
1661 addUnwrappedLine();
1662 }
1663 parseBlock();
1664 }
1665 addUnwrappedLine();
1666 return;
1667 case tok::objc_synchronized:
1668 nextToken();
1669 if (FormatTok->is(tok::l_paren)) {
1670 // Skip synchronization object
1671 parseParens();
1672 }
1673 if (FormatTok->is(tok::l_brace)) {
1676 addUnwrappedLine();
1677 }
1678 parseBlock();
1679 }
1680 addUnwrappedLine();
1681 return;
1682 case tok::objc_try:
1683 // This branch isn't strictly necessary (the kw_try case below would
1684 // do this too after the tok::at is parsed above). But be explicit.
1685 parseTryCatch();
1686 return;
1687 default:
1688 break;
1689 }
1690 break;
1691 case tok::kw_requires: {
1692 if (Style.isCpp()) {
1693 bool ParsedClause = parseRequires();
1694 if (ParsedClause)
1695 return;
1696 } else {
1697 nextToken();
1698 }
1699 break;
1700 }
1701 case tok::kw_enum:
1702 // Ignore if this is part of "template <enum ...".
1703 if (Previous && Previous->is(tok::less)) {
1704 nextToken();
1705 break;
1706 }
1707
1708 // parseEnum falls through and does not yet add an unwrapped line as an
1709 // enum definition can start a structural element.
1710 if (!parseEnum())
1711 break;
1712 // This only applies for C++.
1713 if (!Style.isCpp()) {
1714 addUnwrappedLine();
1715 return;
1716 }
1717 break;
1718 case tok::kw_typedef:
1719 nextToken();
1720 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1721 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1722 Keywords.kw_CF_CLOSED_ENUM,
1723 Keywords.kw_NS_CLOSED_ENUM)) {
1724 parseEnum();
1725 }
1726 break;
1727 case tok::kw_class:
1728 if (Style.isVerilog()) {
1729 parseBlock();
1730 addUnwrappedLine();
1731 return;
1732 }
1733 [[fallthrough]];
1734 case tok::kw_struct:
1735 case tok::kw_union:
1736 if (parseStructLike())
1737 return;
1738 break;
1739 case tok::period:
1740 nextToken();
1741 // In Java, classes have an implicit static member "class".
1742 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1743 FormatTok->is(tok::kw_class)) {
1744 nextToken();
1745 }
1746 if (Style.isJavaScript() && FormatTok &&
1747 FormatTok->Tok.getIdentifierInfo()) {
1748 // JavaScript only has pseudo keywords, all keywords are allowed to
1749 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1750 nextToken();
1751 }
1752 break;
1753 case tok::semi:
1754 nextToken();
1755 addUnwrappedLine();
1756 return;
1757 case tok::r_brace:
1758 addUnwrappedLine();
1759 return;
1760 case tok::l_paren: {
1761 parseParens();
1762 // Break the unwrapped line if a K&R C function definition has a parameter
1763 // declaration.
1764 if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1765 break;
1766 if (isC78ParameterDecl(FormatTok,
1767 Tokens->peekNextToken(/*SkipComment=*/true),
1768 Previous)) {
1769 addUnwrappedLine();
1770 return;
1771 }
1772 break;
1773 }
1774 case tok::kw_operator:
1775 nextToken();
1776 if (FormatTok->isBinaryOperator())
1777 nextToken();
1778 break;
1779 case tok::caret:
1780 nextToken();
1781 if (FormatTok->Tok.isAnyIdentifier() ||
1782 FormatTok->isSimpleTypeSpecifier()) {
1783 nextToken();
1784 }
1785 if (FormatTok->is(tok::l_paren))
1786 parseParens();
1787 if (FormatTok->is(tok::l_brace))
1788 parseChildBlock();
1789 break;
1790 case tok::l_brace:
1791 if (NextLBracesType != TT_Unknown)
1792 FormatTok->setFinalizedType(NextLBracesType);
1793 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1794 // A block outside of parentheses must be the last part of a
1795 // structural element.
1796 // FIXME: Figure out cases where this is not true, and add projections
1797 // for them (the one we know is missing are lambdas).
1798 if (Style.Language == FormatStyle::LK_Java &&
1799 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1800 // If necessary, we could set the type to something different than
1801 // TT_FunctionLBrace.
1804 addUnwrappedLine();
1805 }
1806 } else if (Style.BraceWrapping.AfterFunction) {
1807 addUnwrappedLine();
1808 }
1809 FormatTok->setFinalizedType(TT_FunctionLBrace);
1810 parseBlock();
1811 addUnwrappedLine();
1812 return;
1813 }
1814 // Otherwise this was a braced init list, and the structural
1815 // element continues.
1816 break;
1817 case tok::kw_try:
1818 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1819 // field/method declaration.
1820 nextToken();
1821 break;
1822 }
1823 // We arrive here when parsing function-try blocks.
1824 if (Style.BraceWrapping.AfterFunction)
1825 addUnwrappedLine();
1826 parseTryCatch();
1827 return;
1828 case tok::identifier: {
1829 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1830 Line->MustBeDeclaration) {
1831 addUnwrappedLine();
1832 parseCSharpGenericTypeConstraint();
1833 break;
1834 }
1835 if (FormatTok->is(TT_MacroBlockEnd)) {
1836 addUnwrappedLine();
1837 return;
1838 }
1839
1840 // Function declarations (as opposed to function expressions) are parsed
1841 // on their own unwrapped line by continuing this loop. Function
1842 // expressions (functions that are not on their own line) must not create
1843 // a new unwrapped line, so they are special cased below.
1844 size_t TokenCount = Line->Tokens.size();
1845 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1846 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1847 Keywords.kw_async)))) {
1848 tryToParseJSFunction();
1849 break;
1850 }
1851 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1852 FormatTok->is(Keywords.kw_interface)) {
1853 if (Style.isJavaScript()) {
1854 // In JavaScript/TypeScript, "interface" can be used as a standalone
1855 // identifier, e.g. in `var interface = 1;`. If "interface" is
1856 // followed by another identifier, it is very like to be an actual
1857 // interface declaration.
1858 unsigned StoredPosition = Tokens->getPosition();
1859 FormatToken *Next = Tokens->getNextToken();
1860 FormatTok = Tokens->setPosition(StoredPosition);
1861 if (!mustBeJSIdent(Keywords, Next)) {
1862 nextToken();
1863 break;
1864 }
1865 }
1866 parseRecord();
1867 addUnwrappedLine();
1868 return;
1869 }
1870
1871 if (Style.isVerilog()) {
1872 if (FormatTok->is(Keywords.kw_table)) {
1873 parseVerilogTable();
1874 return;
1875 }
1876 if (Keywords.isVerilogBegin(*FormatTok) ||
1877 Keywords.isVerilogHierarchy(*FormatTok)) {
1878 parseBlock();
1879 addUnwrappedLine();
1880 return;
1881 }
1882 }
1883
1884 if (FormatTok->is(Keywords.kw_interface)) {
1885 if (parseStructLike())
1886 return;
1887 break;
1888 }
1889
1890 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1891 parseStatementMacro();
1892 return;
1893 }
1894
1895 // See if the following token should start a new unwrapped line.
1896 StringRef Text = FormatTok->TokenText;
1897
1898 FormatToken *PreviousToken = FormatTok;
1899 nextToken();
1900
1901 // JS doesn't have macros, and within classes colons indicate fields, not
1902 // labels.
1903 if (Style.isJavaScript())
1904 break;
1905
1906 auto OneTokenSoFar = [&]() {
1907 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1908 while (I != E && I->Tok->is(tok::comment))
1909 ++I;
1910 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1911 ++I;
1912 return I != E && (++I == E);
1913 };
1914 if (OneTokenSoFar()) {
1915 // In Verilog labels can be any expression, so we don't do them here.
1916 if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
1917 !Line->MustBeDeclaration) {
1918 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1919 parseLabel(!Style.IndentGotoLabels);
1920 if (HasLabel)
1921 *HasLabel = true;
1922 return;
1923 }
1924 // Recognize function-like macro usages without trailing semicolon as
1925 // well as free-standing macros like Q_OBJECT.
1926 bool FunctionLike = FormatTok->is(tok::l_paren);
1927 if (FunctionLike)
1928 parseParens();
1929
1930 bool FollowedByNewline =
1931 CommentsBeforeNextToken.empty()
1932 ? FormatTok->NewlinesBefore > 0
1933 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1934
1935 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1936 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1937 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1938 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1939 addUnwrappedLine();
1940 return;
1941 }
1942 }
1943 break;
1944 }
1945 case tok::equal:
1946 if ((Style.isJavaScript() || Style.isCSharp()) &&
1947 FormatTok->is(TT_FatArrow)) {
1948 tryToParseChildBlock();
1949 break;
1950 }
1951
1952 nextToken();
1953 if (FormatTok->is(tok::l_brace)) {
1954 // Block kind should probably be set to BK_BracedInit for any language.
1955 // C# needs this change to ensure that array initialisers and object
1956 // initialisers are indented the same way.
1957 if (Style.isCSharp())
1958 FormatTok->setBlockKind(BK_BracedInit);
1959 nextToken();
1960 parseBracedList();
1961 } else if (Style.Language == FormatStyle::LK_Proto &&
1962 FormatTok->is(tok::less)) {
1963 nextToken();
1964 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1965 /*ClosingBraceKind=*/tok::greater);
1966 }
1967 break;
1968 case tok::l_square:
1969 parseSquare();
1970 break;
1971 case tok::kw_new:
1972 parseNew();
1973 break;
1974 case tok::kw_case:
1975 // Proto: there are no switch/case statements.
1976 if (Style.isProto()) {
1977 nextToken();
1978 return;
1979 }
1980 // In Verilog switch is called case.
1981 if (Style.isVerilog()) {
1982 parseBlock();
1983 addUnwrappedLine();
1984 return;
1985 }
1986 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1987 // 'case: string' field declaration.
1988 nextToken();
1989 break;
1990 }
1991 parseCaseLabel();
1992 break;
1993 case tok::kw_default:
1994 nextToken();
1995 if (Style.isVerilog()) {
1996 if (FormatTok->is(tok::colon)) {
1997 // The label will be handled in the next iteration.
1998 break;
1999 }
2000 if (FormatTok->is(Keywords.kw_clocking)) {
2001 // A default clocking block.
2002 parseBlock();
2003 addUnwrappedLine();
2004 return;
2005 }
2006 parseVerilogCaseLabel();
2007 return;
2008 }
2009 break;
2010 case tok::colon:
2011 nextToken();
2012 if (Style.isVerilog()) {
2013 parseVerilogCaseLabel();
2014 return;
2015 }
2016 break;
2017 default:
2018 nextToken();
2019 break;
2020 }
2021 } while (!eof());
2022}
2023
2024bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2025 assert(FormatTok->is(tok::l_brace));
2026 if (!Style.isCSharp())
2027 return false;
2028 // See if it's a property accessor.
2029 if (FormatTok->Previous->isNot(tok::identifier))
2030 return false;
2031
2032 // See if we are inside a property accessor.
2033 //
2034 // Record the current tokenPosition so that we can advance and
2035 // reset the current token. `Next` is not set yet so we need
2036 // another way to advance along the token stream.
2037 unsigned int StoredPosition = Tokens->getPosition();
2038 FormatToken *Tok = Tokens->getNextToken();
2039
2040 // A trivial property accessor is of the form:
2041 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2042 // Track these as they do not require line breaks to be introduced.
2043 bool HasSpecialAccessor = false;
2044 bool IsTrivialPropertyAccessor = true;
2045 while (!eof()) {
2046 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2047 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2048 Keywords.kw_init, Keywords.kw_set)) {
2049 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2050 HasSpecialAccessor = true;
2051 Tok = Tokens->getNextToken();
2052 continue;
2053 }
2054 if (Tok->isNot(tok::r_brace))
2055 IsTrivialPropertyAccessor = false;
2056 break;
2057 }
2058
2059 if (!HasSpecialAccessor) {
2060 Tokens->setPosition(StoredPosition);
2061 return false;
2062 }
2063
2064 // Try to parse the property accessor:
2065 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2066 Tokens->setPosition(StoredPosition);
2067 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2068 addUnwrappedLine();
2069 nextToken();
2070 do {
2071 switch (FormatTok->Tok.getKind()) {
2072 case tok::r_brace:
2073 nextToken();
2074 if (FormatTok->is(tok::equal)) {
2075 while (!eof() && FormatTok->isNot(tok::semi))
2076 nextToken();
2077 nextToken();
2078 }
2079 addUnwrappedLine();
2080 return true;
2081 case tok::l_brace:
2082 ++Line->Level;
2083 parseBlock(/*MustBeDeclaration=*/true);
2084 addUnwrappedLine();
2085 --Line->Level;
2086 break;
2087 case tok::equal:
2088 if (FormatTok->is(TT_FatArrow)) {
2089 ++Line->Level;
2090 do {
2091 nextToken();
2092 } while (!eof() && FormatTok->isNot(tok::semi));
2093 nextToken();
2094 addUnwrappedLine();
2095 --Line->Level;
2096 break;
2097 }
2098 nextToken();
2099 break;
2100 default:
2101 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2102 Keywords.kw_set) &&
2103 !IsTrivialPropertyAccessor) {
2104 // Non-trivial get/set needs to be on its own line.
2105 addUnwrappedLine();
2106 }
2107 nextToken();
2108 }
2109 } while (!eof());
2110
2111 // Unreachable for well-formed code (paired '{' and '}').
2112 return true;
2113}
2114
2115bool UnwrappedLineParser::tryToParseLambda() {
2116 assert(FormatTok->is(tok::l_square));
2117 if (!Style.isCpp()) {
2118 nextToken();
2119 return false;
2120 }
2121 FormatToken &LSquare = *FormatTok;
2122 if (!tryToParseLambdaIntroducer())
2123 return false;
2124
2125 bool SeenArrow = false;
2126 bool InTemplateParameterList = false;
2127
2128 while (FormatTok->isNot(tok::l_brace)) {
2129 if (FormatTok->isSimpleTypeSpecifier()) {
2130 nextToken();
2131 continue;
2132 }
2133 switch (FormatTok->Tok.getKind()) {
2134 case tok::l_brace:
2135 break;
2136 case tok::l_paren:
2137 parseParens();
2138 break;
2139 case tok::l_square:
2140 parseSquare();
2141 break;
2142 case tok::less:
2143 assert(FormatTok->Previous);
2144 if (FormatTok->Previous->is(tok::r_square))
2145 InTemplateParameterList = true;
2146 nextToken();
2147 break;
2148 case tok::kw_auto:
2149 case tok::kw_class:
2150 case tok::kw_template:
2151 case tok::kw_typename:
2152 case tok::amp:
2153 case tok::star:
2154 case tok::kw_const:
2155 case tok::kw_constexpr:
2156 case tok::kw_consteval:
2157 case tok::comma:
2158 case tok::greater:
2159 case tok::identifier:
2160 case tok::numeric_constant:
2161 case tok::coloncolon:
2162 case tok::kw_mutable:
2163 case tok::kw_noexcept:
2164 case tok::kw_static:
2165 nextToken();
2166 break;
2167 // Specialization of a template with an integer parameter can contain
2168 // arithmetic, logical, comparison and ternary operators.
2169 //
2170 // FIXME: This also accepts sequences of operators that are not in the scope
2171 // of a template argument list.
2172 //
2173 // In a C++ lambda a template type can only occur after an arrow. We use
2174 // this as an heuristic to distinguish between Objective-C expressions
2175 // followed by an `a->b` expression, such as:
2176 // ([obj func:arg] + a->b)
2177 // Otherwise the code below would parse as a lambda.
2178 //
2179 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2180 // explicit template lists: []<bool b = true && false>(U &&u){}
2181 case tok::plus:
2182 case tok::minus:
2183 case tok::exclaim:
2184 case tok::tilde:
2185 case tok::slash:
2186 case tok::percent:
2187 case tok::lessless:
2188 case tok::pipe:
2189 case tok::pipepipe:
2190 case tok::ampamp:
2191 case tok::caret:
2192 case tok::equalequal:
2193 case tok::exclaimequal:
2194 case tok::greaterequal:
2195 case tok::lessequal:
2196 case tok::question:
2197 case tok::colon:
2198 case tok::ellipsis:
2199 case tok::kw_true:
2200 case tok::kw_false:
2201 if (SeenArrow || InTemplateParameterList) {
2202 nextToken();
2203 break;
2204 }
2205 return true;
2206 case tok::arrow:
2207 // This might or might not actually be a lambda arrow (this could be an
2208 // ObjC method invocation followed by a dereferencing arrow). We might
2209 // reset this back to TT_Unknown in TokenAnnotator.
2210 FormatTok->setFinalizedType(TT_LambdaArrow);
2211 SeenArrow = true;
2212 nextToken();
2213 break;
2214 default:
2215 return true;
2216 }
2217 }
2218 FormatTok->setFinalizedType(TT_LambdaLBrace);
2219 LSquare.setFinalizedType(TT_LambdaLSquare);
2220 parseChildBlock();
2221 return true;
2222}
2223
2224bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2225 const FormatToken *Previous = FormatTok->Previous;
2226 const FormatToken *LeftSquare = FormatTok;
2227 nextToken();
2228 if (Previous &&
2229 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2230 tok::kw_delete, tok::l_square) ||
2231 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2232 Previous->isSimpleTypeSpecifier())) {
2233 return false;
2234 }
2235 if (FormatTok->is(tok::l_square))
2236 return false;
2237 if (FormatTok->is(tok::r_square)) {
2238 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2239 if (Next->is(tok::greater))
2240 return false;
2241 }
2242 parseSquare(/*LambdaIntroducer=*/true);
2243 return true;
2244}
2245
2246void UnwrappedLineParser::tryToParseJSFunction() {
2247 assert(FormatTok->is(Keywords.kw_function) ||
2248 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2249 if (FormatTok->is(Keywords.kw_async))
2250 nextToken();
2251 // Consume "function".
2252 nextToken();
2253
2254 // Consume * (generator function). Treat it like C++'s overloaded operators.
2255 if (FormatTok->is(tok::star)) {
2256 FormatTok->setFinalizedType(TT_OverloadedOperator);
2257 nextToken();
2258 }
2259
2260 // Consume function name.
2261 if (FormatTok->is(tok::identifier))
2262 nextToken();
2263
2264 if (FormatTok->isNot(tok::l_paren))
2265 return;
2266
2267 // Parse formal parameter list.
2268 parseParens();
2269
2270 if (FormatTok->is(tok::colon)) {
2271 // Parse a type definition.
2272 nextToken();
2273
2274 // Eat the type declaration. For braced inline object types, balance braces,
2275 // otherwise just parse until finding an l_brace for the function body.
2276 if (FormatTok->is(tok::l_brace))
2277 tryToParseBracedList();
2278 else
2279 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2280 nextToken();
2281 }
2282
2283 if (FormatTok->is(tok::semi))
2284 return;
2285
2286 parseChildBlock();
2287}
2288
2289bool UnwrappedLineParser::tryToParseBracedList() {
2290 if (FormatTok->is(BK_Unknown))
2291 calculateBraceTypes();
2292 assert(FormatTok->isNot(BK_Unknown));
2293 if (FormatTok->is(BK_Block))
2294 return false;
2295 nextToken();
2296 parseBracedList();
2297 return true;
2298}
2299
2300bool UnwrappedLineParser::tryToParseChildBlock() {
2301 assert(Style.isJavaScript() || Style.isCSharp());
2302 assert(FormatTok->is(TT_FatArrow));
2303 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2304 // They always start an expression or a child block if followed by a curly
2305 // brace.
2306 nextToken();
2307 if (FormatTok->isNot(tok::l_brace))
2308 return false;
2309 parseChildBlock();
2310 return true;
2311}
2312
2313bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2314 bool IsEnum,
2315 tok::TokenKind ClosingBraceKind) {
2316 bool HasError = false;
2317
2318 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2319 // replace this by using parseAssignmentExpression() inside.
2320 do {
2321 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2322 tryToParseChildBlock()) {
2323 continue;
2324 }
2325 if (Style.isJavaScript()) {
2326 if (FormatTok->is(Keywords.kw_function) ||
2327 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2328 tryToParseJSFunction();
2329 continue;
2330 }
2331 if (FormatTok->is(tok::l_brace)) {
2332 // Could be a method inside of a braced list `{a() { return 1; }}`.
2333 if (tryToParseBracedList())
2334 continue;
2335 parseChildBlock();
2336 }
2337 }
2338 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2339 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2340 addUnwrappedLine();
2341 nextToken();
2342 return !HasError;
2343 }
2344 switch (FormatTok->Tok.getKind()) {
2345 case tok::l_square:
2346 if (Style.isCSharp())
2347 parseSquare();
2348 else
2349 tryToParseLambda();
2350 break;
2351 case tok::l_paren:
2352 parseParens();
2353 // JavaScript can just have free standing methods and getters/setters in
2354 // object literals. Detect them by a "{" following ")".
2355 if (Style.isJavaScript()) {
2356 if (FormatTok->is(tok::l_brace))
2357 parseChildBlock();
2358 break;
2359 }
2360 break;
2361 case tok::l_brace:
2362 // Assume there are no blocks inside a braced init list apart
2363 // from the ones we explicitly parse out (like lambdas).
2364 FormatTok->setBlockKind(BK_BracedInit);
2365 nextToken();
2366 parseBracedList();
2367 break;
2368 case tok::less:
2369 if (Style.Language == FormatStyle::LK_Proto ||
2370 ClosingBraceKind == tok::greater) {
2371 nextToken();
2372 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2373 /*ClosingBraceKind=*/tok::greater);
2374 } else {
2375 nextToken();
2376 }
2377 break;
2378 case tok::semi:
2379 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2380 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2381 // used for error recovery if we have otherwise determined that this is
2382 // a braced list.
2383 if (Style.isJavaScript()) {
2384 nextToken();
2385 break;
2386 }
2387 HasError = true;
2388 if (!ContinueOnSemicolons)
2389 return !HasError;
2390 nextToken();
2391 break;
2392 case tok::comma:
2393 nextToken();
2394 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2395 addUnwrappedLine();
2396 break;
2397 default:
2398 nextToken();
2399 break;
2400 }
2401 } while (!eof());
2402 return false;
2403}
2404
2405/// \brief Parses a pair of parentheses (and everything between them).
2406/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2407/// double ampersands. This only counts for the current parens scope.
2408void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2409 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2410 nextToken();
2411 do {
2412 switch (FormatTok->Tok.getKind()) {
2413 case tok::l_paren:
2414 parseParens();
2415 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2416 parseChildBlock();
2417 break;
2418 case tok::r_paren:
2419 nextToken();
2420 return;
2421 case tok::r_brace:
2422 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2423 return;
2424 case tok::l_square:
2425 tryToParseLambda();
2426 break;
2427 case tok::l_brace:
2428 if (!tryToParseBracedList())
2429 parseChildBlock();
2430 break;
2431 case tok::at:
2432 nextToken();
2433 if (FormatTok->is(tok::l_brace)) {
2434 nextToken();
2435 parseBracedList();
2436 }
2437 break;
2438 case tok::equal:
2439 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2440 tryToParseChildBlock();
2441 else
2442 nextToken();
2443 break;
2444 case tok::kw_class:
2445 if (Style.isJavaScript())
2446 parseRecord(/*ParseAsExpr=*/true);
2447 else
2448 nextToken();
2449 break;
2450 case tok::identifier:
2451 if (Style.isJavaScript() &&
2452 (FormatTok->is(Keywords.kw_function) ||
2453 FormatTok->startsSequence(Keywords.kw_async,
2454 Keywords.kw_function))) {
2455 tryToParseJSFunction();
2456 } else {
2457 nextToken();
2458 }
2459 break;
2460 case tok::kw_requires: {
2461 auto RequiresToken = FormatTok;
2462 nextToken();
2463 parseRequiresExpression(RequiresToken);
2464 break;
2465 }
2466 case tok::ampamp:
2467 if (AmpAmpTokenType != TT_Unknown)
2468 FormatTok->setFinalizedType(AmpAmpTokenType);
2469 [[fallthrough]];
2470 default:
2471 nextToken();
2472 break;
2473 }
2474 } while (!eof());
2475}
2476
2477void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2478 if (!LambdaIntroducer) {
2479 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2480 if (tryToParseLambda())
2481 return;
2482 }
2483 do {
2484 switch (FormatTok->Tok.getKind()) {
2485 case tok::l_paren:
2486 parseParens();
2487 break;
2488 case tok::r_square:
2489 nextToken();
2490 return;
2491 case tok::r_brace:
2492 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2493 return;
2494 case tok::l_square:
2495 parseSquare();
2496 break;
2497 case tok::l_brace: {
2498 if (!tryToParseBracedList())
2499 parseChildBlock();
2500 break;
2501 }
2502 case tok::at:
2503 nextToken();
2504 if (FormatTok->is(tok::l_brace)) {
2505 nextToken();
2506 parseBracedList();
2507 }
2508 break;
2509 default:
2510 nextToken();
2511 break;
2512 }
2513 } while (!eof());
2514}
2515
2516void UnwrappedLineParser::keepAncestorBraces() {
2517 if (!Style.RemoveBracesLLVM)
2518 return;
2519
2520 const int MaxNestingLevels = 2;
2521 const int Size = NestedTooDeep.size();
2522 if (Size >= MaxNestingLevels)
2523 NestedTooDeep[Size - MaxNestingLevels] = true;
2524 NestedTooDeep.push_back(false);
2525}
2526
2528 for (const auto &Token : llvm::reverse(Line.Tokens))
2529 if (Token.Tok->isNot(tok::comment))
2530 return Token.Tok;
2531
2532 return nullptr;
2533}
2534
2535void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2536 FormatToken *Tok = nullptr;
2537
2538 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2539 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2541 ? getLastNonComment(*Line)
2542 : Line->Tokens.back().Tok;
2543 assert(Tok);
2544 if (Tok->BraceCount < 0) {
2545 assert(Tok->BraceCount == -1);
2546 Tok = nullptr;
2547 } else {
2548 Tok->BraceCount = -1;
2549 }
2550 }
2551
2552 addUnwrappedLine();
2553 ++Line->Level;
2554 parseStructuralElement();
2555
2556 if (Tok) {
2557 assert(!Line->InPPDirective);
2558 Tok = nullptr;
2559 for (const auto &L : llvm::reverse(*CurrentLines)) {
2560 if (!L.InPPDirective && getLastNonComment(L)) {
2561 Tok = L.Tokens.back().Tok;
2562 break;
2563 }
2564 }
2565 assert(Tok);
2566 ++Tok->BraceCount;
2567 }
2568
2569 if (CheckEOF && eof())
2570 addUnwrappedLine();
2571
2572 --Line->Level;
2573}
2574
2575static void markOptionalBraces(FormatToken *LeftBrace) {
2576 if (!LeftBrace)
2577 return;
2578
2579 assert(LeftBrace->is(tok::l_brace));
2580
2581 FormatToken *RightBrace = LeftBrace->MatchingParen;
2582 if (!RightBrace) {
2583 assert(!LeftBrace->Optional);
2584 return;
2585 }
2586
2587 assert(RightBrace->is(tok::r_brace));
2588 assert(RightBrace->MatchingParen == LeftBrace);
2589 assert(LeftBrace->Optional == RightBrace->Optional);
2590
2591 LeftBrace->Optional = true;
2592 RightBrace->Optional = true;
2593}
2594
2595void UnwrappedLineParser::handleAttributes() {
2596 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2597 if (FormatTok->is(TT_AttributeMacro))
2598 nextToken();
2599 handleCppAttributes();
2600}
2601
2602bool UnwrappedLineParser::handleCppAttributes() {
2603 // Handle [[likely]] / [[unlikely]] attributes.
2604 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2605 parseSquare();
2606 return true;
2607 }
2608 return false;
2609}
2610
2611/// Returns whether \c Tok begins a block.
2612bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2613 // FIXME: rename the function or make
2614 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2615 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2616 : Tok.is(tok::l_brace);
2617}
2618
2619FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2620 bool KeepBraces) {
2621 assert(FormatTok->is(tok::kw_if) && "'if' expected");
2622 nextToken();
2623 if (FormatTok->is(tok::exclaim))
2624 nextToken();
2625
2626 bool KeepIfBraces = true;
2627 if (FormatTok->is(tok::kw_consteval)) {
2628 nextToken();
2629 } else {
2630 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2631 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2632 nextToken();
2633 if (FormatTok->is(tok::l_paren))
2634 parseParens();
2635 }
2636 handleAttributes();
2637
2638 bool NeedsUnwrappedLine = false;
2639 keepAncestorBraces();
2640
2641 FormatToken *IfLeftBrace = nullptr;
2642 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2643
2644 if (isBlockBegin(*FormatTok)) {
2645 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2646 IfLeftBrace = FormatTok;
2647 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2648 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2649 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2650 if (Style.BraceWrapping.BeforeElse)
2651 addUnwrappedLine();
2652 else
2653 NeedsUnwrappedLine = true;
2654 } else {
2655 parseUnbracedBody();
2656 }
2657
2658 if (Style.RemoveBracesLLVM) {
2659 assert(!NestedTooDeep.empty());
2660 KeepIfBraces = KeepIfBraces ||
2661 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2662 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2663 IfBlockKind == IfStmtKind::IfElseIf;
2664 }
2665
2666 bool KeepElseBraces = KeepIfBraces;
2667 FormatToken *ElseLeftBrace = nullptr;
2668 IfStmtKind Kind = IfStmtKind::IfOnly;
2669
2670 if (FormatTok->is(tok::kw_else)) {
2671 if (Style.RemoveBracesLLVM) {
2672 NestedTooDeep.back() = false;
2673 Kind = IfStmtKind::IfElse;
2674 }
2675 nextToken();
2676 handleAttributes();
2677 if (isBlockBegin(*FormatTok)) {
2678 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2679 FormatTok->setFinalizedType(TT_ElseLBrace);
2680 ElseLeftBrace = FormatTok;
2681 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2682 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2683 FormatToken *IfLBrace =
2684 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2685 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2686 if (FormatTok->is(tok::kw_else)) {
2687 KeepElseBraces = KeepElseBraces ||
2688 ElseBlockKind == IfStmtKind::IfOnly ||
2689 ElseBlockKind == IfStmtKind::IfElseIf;
2690 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2691 KeepElseBraces = true;
2692 assert(ElseLeftBrace->MatchingParen);
2693 markOptionalBraces(ElseLeftBrace);
2694 }
2695 addUnwrappedLine();
2696 } else if (FormatTok->is(tok::kw_if)) {
2697 const FormatToken *Previous = Tokens->getPreviousToken();
2698 assert(Previous);
2699 const bool IsPrecededByComment = Previous->is(tok::comment);
2700 if (IsPrecededByComment) {
2701 addUnwrappedLine();
2702 ++Line->Level;
2703 }
2704 bool TooDeep = true;
2705 if (Style.RemoveBracesLLVM) {
2706 Kind = IfStmtKind::IfElseIf;
2707 TooDeep = NestedTooDeep.pop_back_val();
2708 }
2709 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2710 if (Style.RemoveBracesLLVM)
2711 NestedTooDeep.push_back(TooDeep);
2712 if (IsPrecededByComment)
2713 --Line->Level;
2714 } else {
2715 parseUnbracedBody(/*CheckEOF=*/true);
2716 }
2717 } else {
2718 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2719 if (NeedsUnwrappedLine)
2720 addUnwrappedLine();
2721 }
2722
2723 if (!Style.RemoveBracesLLVM)
2724 return nullptr;
2725
2726 assert(!NestedTooDeep.empty());
2727 KeepElseBraces = KeepElseBraces ||
2728 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2729 NestedTooDeep.back();
2730
2731 NestedTooDeep.pop_back();
2732
2733 if (!KeepIfBraces && !KeepElseBraces) {
2734 markOptionalBraces(IfLeftBrace);
2735 markOptionalBraces(ElseLeftBrace);
2736 } else if (IfLeftBrace) {
2737 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2738 if (IfRightBrace) {
2739 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2740 assert(!IfLeftBrace->Optional);
2741 assert(!IfRightBrace->Optional);
2742 IfLeftBrace->MatchingParen = nullptr;
2743 IfRightBrace->MatchingParen = nullptr;
2744 }
2745 }
2746
2747 if (IfKind)
2748 *IfKind = Kind;
2749
2750 return IfLeftBrace;
2751}
2752
2753void UnwrappedLineParser::parseTryCatch() {
2754 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2755 nextToken();
2756 bool NeedsUnwrappedLine = false;
2757 if (FormatTok->is(tok::colon)) {
2758 // We are in a function try block, what comes is an initializer list.
2759 nextToken();
2760
2761 // In case identifiers were removed by clang-tidy, what might follow is
2762 // multiple commas in sequence - before the first identifier.
2763 while (FormatTok->is(tok::comma))
2764 nextToken();
2765
2766 while (FormatTok->is(tok::identifier)) {
2767 nextToken();
2768 if (FormatTok->is(tok::l_paren))
2769 parseParens();
2770 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2771 FormatTok->is(tok::l_brace)) {
2772 do {
2773 nextToken();
2774 } while (!FormatTok->is(tok::r_brace));
2775 nextToken();
2776 }
2777
2778 // In case identifiers were removed by clang-tidy, what might follow is
2779 // multiple commas in sequence - after the first identifier.
2780 while (FormatTok->is(tok::comma))
2781 nextToken();
2782 }
2783 }
2784 // Parse try with resource.
2785 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2786 parseParens();
2787
2788 keepAncestorBraces();
2789
2790 if (FormatTok->is(tok::l_brace)) {
2791 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2792 parseBlock();
2793 if (Style.BraceWrapping.BeforeCatch)
2794 addUnwrappedLine();
2795 else
2796 NeedsUnwrappedLine = true;
2797 } else if (!FormatTok->is(tok::kw_catch)) {
2798 // The C++ standard requires a compound-statement after a try.
2799 // If there's none, we try to assume there's a structuralElement
2800 // and try to continue.
2801 addUnwrappedLine();
2802 ++Line->Level;
2803 parseStructuralElement();
2804 --Line->Level;
2805 }
2806 while (true) {
2807 if (FormatTok->is(tok::at))
2808 nextToken();
2809 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2810 tok::kw___finally) ||
2811 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2812 FormatTok->is(Keywords.kw_finally)) ||
2813 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2814 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2815 break;
2816 }
2817 nextToken();
2818 while (FormatTok->isNot(tok::l_brace)) {
2819 if (FormatTok->is(tok::l_paren)) {
2820 parseParens();
2821 continue;
2822 }
2823 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2824 if (Style.RemoveBracesLLVM)
2825 NestedTooDeep.pop_back();
2826 return;
2827 }
2828 nextToken();
2829 }
2830 NeedsUnwrappedLine = false;
2831 Line->MustBeDeclaration = false;
2832 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2833 parseBlock();
2834 if (Style.BraceWrapping.BeforeCatch)
2835 addUnwrappedLine();
2836 else
2837 NeedsUnwrappedLine = true;
2838 }
2839
2840 if (Style.RemoveBracesLLVM)
2841 NestedTooDeep.pop_back();
2842
2843 if (NeedsUnwrappedLine)
2844 addUnwrappedLine();
2845}
2846
2847void UnwrappedLineParser::parseNamespace() {
2848 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2849 "'namespace' expected");
2850
2851 const FormatToken &InitialToken = *FormatTok;
2852 nextToken();
2853 if (InitialToken.is(TT_NamespaceMacro)) {
2854 parseParens();
2855 } else {
2856 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2857 tok::l_square, tok::period, tok::l_paren) ||
2858 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2859 if (FormatTok->is(tok::l_square))
2860 parseSquare();
2861 else if (FormatTok->is(tok::l_paren))
2862 parseParens();
2863 else
2864 nextToken();
2865 }
2866 }
2867 if (FormatTok->is(tok::l_brace)) {
2868 if (ShouldBreakBeforeBrace(Style, InitialToken))
2869 addUnwrappedLine();
2870
2871 unsigned AddLevels =
2874 DeclarationScopeStack.size() > 1)
2875 ? 1u
2876 : 0u;
2877 bool ManageWhitesmithsBraces =
2878 AddLevels == 0u &&
2880
2881 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2882 // the whole block.
2883 if (ManageWhitesmithsBraces)
2884 ++Line->Level;
2885
2886 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2887 /*KeepBraces=*/true, /*IfKind=*/nullptr,
2888 ManageWhitesmithsBraces);
2889
2890 // Munch the semicolon after a namespace. This is more common than one would
2891 // think. Putting the semicolon into its own line is very ugly.
2892 if (FormatTok->is(tok::semi))
2893 nextToken();
2894
2895 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2896
2897 if (ManageWhitesmithsBraces)
2898 --Line->Level;
2899 }
2900 // FIXME: Add error handling.
2901}
2902
2903void UnwrappedLineParser::parseNew() {
2904 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2905 nextToken();
2906
2907 if (Style.isCSharp()) {
2908 do {
2909 // Handle constructor invocation, e.g. `new(field: value)`.
2910 if (FormatTok->is(tok::l_paren))
2911 parseParens();
2912
2913 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
2914 if (FormatTok->is(tok::l_brace))
2915 parseBracedList();
2916
2917 if (FormatTok->isOneOf(tok::semi, tok::comma))
2918 return;
2919
2920 nextToken();
2921 } while (!eof());
2922 }
2923
2924 if (Style.Language != FormatStyle::LK_Java)
2925 return;
2926
2927 // In Java, we can parse everything up to the parens, which aren't optional.
2928 do {
2929 // There should not be a ;, { or } before the new's open paren.
2930 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2931 return;
2932
2933 // Consume the parens.
2934 if (FormatTok->is(tok::l_paren)) {
2935 parseParens();
2936
2937 // If there is a class body of an anonymous class, consume that as child.
2938 if (FormatTok->is(tok::l_brace))
2939 parseChildBlock();
2940 return;
2941 }
2942 nextToken();
2943 } while (!eof());
2944}
2945
2946void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2947 keepAncestorBraces();
2948
2949 if (isBlockBegin(*FormatTok)) {
2950 if (!KeepBraces)
2951 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2952 FormatToken *LeftBrace = FormatTok;
2953 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2954 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2955 /*MunchSemi=*/true, KeepBraces);
2956 if (!KeepBraces) {
2957 assert(!NestedTooDeep.empty());
2958 if (!NestedTooDeep.back())
2959 markOptionalBraces(LeftBrace);
2960 }
2961 if (WrapRightBrace)
2962 addUnwrappedLine();
2963 } else {
2964 parseUnbracedBody();
2965 }
2966
2967 if (!KeepBraces)
2968 NestedTooDeep.pop_back();
2969}
2970
2971void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
2972 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
2973 (Style.isVerilog() &&
2974 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
2975 Keywords.kw_always_ff, Keywords.kw_always_latch,
2976 Keywords.kw_final, Keywords.kw_initial,
2977 Keywords.kw_foreach, Keywords.kw_forever,
2978 Keywords.kw_repeat))) &&
2979 "'for', 'while' or foreach macro expected");
2980 const bool KeepBraces = !Style.RemoveBracesLLVM ||
2981 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2982
2983 nextToken();
2984 // JS' for await ( ...
2985 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2986 nextToken();
2987 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2988 nextToken();
2989 if (HasParens && FormatTok->is(tok::l_paren))
2990 parseParens();
2991 // Event control.
2992 if (Style.isVerilog())
2993 parseVerilogSensitivityList();
2994
2995 handleAttributes();
2996 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2997}
2998
2999void UnwrappedLineParser::parseDoWhile() {
3000 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3001 nextToken();
3002
3003 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3004
3005 // FIXME: Add error handling.
3006 if (!FormatTok->is(tok::kw_while)) {
3007 addUnwrappedLine();
3008 return;
3009 }
3010
3011 // If in Whitesmiths mode, the line with the while() needs to be indented
3012 // to the same level as the block.
3014 ++Line->Level;
3015
3016 nextToken();
3017 parseStructuralElement();
3018}
3019
3020void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3021 nextToken();
3022 unsigned OldLineLevel = Line->Level;
3023 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3024 --Line->Level;
3025 if (LeftAlignLabel)
3026 Line->Level = 0;
3027
3028 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3029 FormatTok->is(tok::l_brace)) {
3030
3031 CompoundStatementIndenter Indenter(this, Line->Level,
3034 parseBlock();
3035 if (FormatTok->is(tok::kw_break)) {
3038 addUnwrappedLine();
3039 if (!Style.IndentCaseBlocks &&
3041 ++Line->Level;
3042 }
3043 }
3044 parseStructuralElement();
3045 }
3046 addUnwrappedLine();
3047 } else {
3048 if (FormatTok->is(tok::semi))
3049 nextToken();
3050 addUnwrappedLine();
3051 }
3052 Line->Level = OldLineLevel;
3053 if (FormatTok->isNot(tok::l_brace)) {
3054 parseStructuralElement();
3055 addUnwrappedLine();
3056 }
3057}
3058
3059void UnwrappedLineParser::parseCaseLabel() {
3060 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3061
3062 // FIXME: fix handling of complex expressions here.
3063 do {
3064 nextToken();
3065 } while (!eof() && !FormatTok->is(tok::colon));
3066 parseLabel();
3067}
3068
3069void UnwrappedLineParser::parseSwitch() {
3070 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3071 nextToken();
3072 if (FormatTok->is(tok::l_paren))
3073 parseParens();
3074
3075 keepAncestorBraces();
3076
3077 if (FormatTok->is(tok::l_brace)) {
3078 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3079 parseBlock();
3080 addUnwrappedLine();
3081 } else {
3082 addUnwrappedLine();
3083 ++Line->Level;
3084 parseStructuralElement();
3085 --Line->Level;
3086 }
3087
3088 if (Style.RemoveBracesLLVM)
3089 NestedTooDeep.pop_back();
3090}
3091
3092// Operators that can follow a C variable.
3094 switch (kind) {
3095 case tok::ampamp:
3096 case tok::ampequal:
3097 case tok::arrow:
3098 case tok::caret:
3099 case tok::caretequal:
3100 case tok::comma:
3101 case tok::ellipsis:
3102 case tok::equal:
3103 case tok::equalequal:
3104 case tok::exclaim:
3105 case tok::exclaimequal:
3106 case tok::greater:
3107 case tok::greaterequal:
3108 case tok::greatergreater:
3109 case tok::greatergreaterequal:
3110 case tok::l_paren:
3111 case tok::l_square:
3112 case tok::less:
3113 case tok::lessequal:
3114 case tok::lessless:
3115 case tok::lesslessequal:
3116 case tok::minus:
3117 case tok::minusequal:
3118 case tok::minusminus:
3119 case tok::percent:
3120 case tok::percentequal:
3121 case tok::period:
3122 case tok::pipe:
3123 case tok::pipeequal:
3124 case tok::pipepipe:
3125 case tok::plus:
3126 case tok::plusequal:
3127 case tok::plusplus:
3128 case tok::question:
3129 case tok::r_brace:
3130 case tok::r_paren:
3131 case tok::r_square:
3132 case tok::semi:
3133 case tok::slash:
3134 case tok::slashequal:
3135 case tok::star:
3136 case tok::starequal:
3137 return true;
3138 default:
3139 return false;
3140 }
3141}
3142
3143void UnwrappedLineParser::parseAccessSpecifier() {
3144 FormatToken *AccessSpecifierCandidate = FormatTok;
3145 nextToken();
3146 // Understand Qt's slots.
3147 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3148 nextToken();
3149 // Otherwise, we don't know what it is, and we'd better keep the next token.
3150 if (FormatTok->is(tok::colon)) {
3151 nextToken();
3152 addUnwrappedLine();
3153 } else if (!FormatTok->is(tok::coloncolon) &&
3154 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3155 // Not a variable name nor namespace name.
3156 addUnwrappedLine();
3157 } else if (AccessSpecifierCandidate) {
3158 // Consider the access specifier to be a C identifier.
3159 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3160 }
3161}
3162
3163/// \brief Parses a requires, decides if it is a clause or an expression.
3164/// \pre The current token has to be the requires keyword.
3165/// \returns true if it parsed a clause.
3166bool clang::format::UnwrappedLineParser::parseRequires() {
3167 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3168 auto RequiresToken = FormatTok;
3169
3170 // We try to guess if it is a requires clause, or a requires expression. For
3171 // that we first consume the keyword and check the next token.
3172 nextToken();
3173
3174 switch (FormatTok->Tok.getKind()) {
3175 case tok::l_brace:
3176 // This can only be an expression, never a clause.
3177 parseRequiresExpression(RequiresToken);
3178 return false;
3179 case tok::l_paren:
3180 // Clauses and expression can start with a paren, it's unclear what we have.
3181 break;
3182 default:
3183 // All other tokens can only be a clause.
3184 parseRequiresClause(RequiresToken);
3185 return true;
3186 }
3187
3188 // Looking forward we would have to decide if there are function declaration
3189 // like arguments to the requires expression:
3190 // requires (T t) {
3191 // Or there is a constraint expression for the requires clause:
3192 // requires (C<T> && ...
3193
3194 // But first let's look behind.
3195 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3196
3197 if (!PreviousNonComment ||
3198 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3199 // If there is no token, or an expression left brace, we are a requires
3200 // clause within a requires expression.
3201 parseRequiresClause(RequiresToken);
3202 return true;
3203 }
3204
3205 switch (PreviousNonComment->Tok.getKind()) {
3206 case tok::greater:
3207 case tok::r_paren:
3208 case tok::kw_noexcept:
3209 case tok::kw_const:
3210 // This is a requires clause.
3211 parseRequiresClause(RequiresToken);
3212 return true;
3213 case tok::amp:
3214 case tok::ampamp: {
3215 // This can be either:
3216 // if (... && requires (T t) ...)
3217 // Or
3218 // void member(...) && requires (C<T> ...
3219 // We check the one token before that for a const:
3220 // void member(...) const && requires (C<T> ...
3221 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3222 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3223 parseRequiresClause(RequiresToken);
3224 return true;
3225 }
3226 break;
3227 }
3228 default:
3229 if (PreviousNonComment->isTypeOrIdentifier()) {
3230 // This is a requires clause.
3231 parseRequiresClause(RequiresToken);
3232 return true;
3233 }
3234 // It's an expression.
3235 parseRequiresExpression(RequiresToken);
3236 return false;
3237 }
3238
3239 // Now we look forward and try to check if the paren content is a parameter
3240 // list. The parameters can be cv-qualified and contain references or
3241 // pointers.
3242 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3243 // of stuff: typename, const, *, &, &&, ::, identifiers.
3244
3245 unsigned StoredPosition = Tokens->getPosition();
3246 FormatToken *NextToken = Tokens->getNextToken();
3247 int Lookahead = 0;
3248 auto PeekNext = [&Lookahead, &NextToken, this] {
3249 ++Lookahead;
3250 NextToken = Tokens->getNextToken();
3251 };
3252
3253 bool FoundType = false;
3254 bool LastWasColonColon = false;
3255 int OpenAngles = 0;
3256
3257 for (; Lookahead < 50; PeekNext()) {
3258 switch (NextToken->Tok.getKind()) {
3259 case tok::kw_volatile:
3260 case tok::kw_const:
3261 case tok::comma:
3262 FormatTok = Tokens->setPosition(StoredPosition);
3263 parseRequiresExpression(RequiresToken);
3264 return false;
3265 case tok::r_paren:
3266 case tok::pipepipe:
3267 FormatTok = Tokens->setPosition(StoredPosition);
3268 parseRequiresClause(RequiresToken);
3269 return true;
3270 case tok::eof:
3271 // Break out of the loop.
3272 Lookahead = 50;
3273 break;
3274 case tok::coloncolon:
3275 LastWasColonColon = true;
3276 break;
3277 case tok::identifier:
3278 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3279 FormatTok = Tokens->setPosition(StoredPosition);
3280 parseRequiresExpression(RequiresToken);
3281 return false;
3282 }
3283 FoundType = true;
3284 LastWasColonColon = false;
3285 break;
3286 case tok::less:
3287 ++OpenAngles;
3288 break;
3289 case tok::greater:
3290 --OpenAngles;
3291 break;
3292 default:
3293 if (NextToken->isSimpleTypeSpecifier()) {
3294 FormatTok = Tokens->setPosition(StoredPosition);
3295 parseRequiresExpression(RequiresToken);
3296 return false;
3297 }
3298 break;
3299 }
3300 }
3301 // This seems to be a complicated expression, just assume it's a clause.
3302 FormatTok = Tokens->setPosition(StoredPosition);
3303 parseRequiresClause(RequiresToken);
3304 return true;
3305}
3306
3307/// \brief Parses a requires clause.
3308/// \param RequiresToken The requires keyword token, which starts this clause.
3309/// \pre We need to be on the next token after the requires keyword.
3310/// \sa parseRequiresExpression
3311///
3312/// Returns if it either has finished parsing the clause, or it detects, that
3313/// the clause is incorrect.
3314void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3315 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3316 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3317
3318 // If there is no previous token, we are within a requires expression,
3319 // otherwise we will always have the template or function declaration in front
3320 // of it.
3321 bool InRequiresExpression =
3322 !RequiresToken->Previous ||
3323 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3324
3325 RequiresToken->setFinalizedType(InRequiresExpression
3326 ? TT_RequiresClauseInARequiresExpression
3327 : TT_RequiresClause);
3328
3329 // NOTE: parseConstraintExpression is only ever called from this function.
3330 // It could be inlined into here.
3331 parseConstraintExpression();
3332
3333 if (!InRequiresExpression)
3334 FormatTok->Previous->ClosesRequiresClause = true;
3335}
3336
3337/// \brief Parses a requires expression.
3338/// \param RequiresToken The requires keyword token, which starts this clause.
3339/// \pre We need to be on the next token after the requires keyword.
3340/// \sa parseRequiresClause
3341///
3342/// Returns if it either has finished parsing the expression, or it detects,
3343/// that the expression is incorrect.
3344void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3345 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3346 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3347
3348 RequiresToken->setFinalizedType(TT_RequiresExpression);
3349
3350 if (FormatTok->is(tok::l_paren)) {
3351 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3352 parseParens();
3353 }
3354
3355 if (FormatTok->is(tok::l_brace)) {
3356 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3357 parseChildBlock(/*CanContainBracedList=*/false,
3358 /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3359 }
3360}
3361
3362/// \brief Parses a constraint expression.
3363///
3364/// This is the body of a requires clause. It returns, when the parsing is
3365/// complete, or the expression is incorrect.
3366void UnwrappedLineParser::parseConstraintExpression() {
3367 // The special handling for lambdas is needed since tryToParseLambda() eats a
3368 // token and if a requires expression is the last part of a requires clause
3369 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3370 // not set on the correct token. Thus we need to be aware if we even expect a
3371 // lambda to be possible.
3372 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3373 bool LambdaNextTimeAllowed = true;
3374 do {
3375 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3376
3377 switch (FormatTok->Tok.getKind()) {
3378 case tok::kw_requires: {
3379 auto RequiresToken = FormatTok;
3380 nextToken();
3381 parseRequiresExpression(RequiresToken);
3382 break;
3383 }
3384
3385 case tok::l_paren:
3386 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3387 break;
3388
3389 case tok::l_square:
3390 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3391 return;
3392 break;
3393
3394 case tok::kw_const:
3395 case tok::semi:
3396 case tok::kw_class:
3397 case tok::kw_struct:
3398 case tok::kw_union:
3399 return;
3400
3401 case tok::l_brace:
3402 // Potential function body.
3403 return;
3404
3405 case tok::ampamp:
3406 case tok::pipepipe:
3407 FormatTok->setFinalizedType(TT_BinaryOperator);
3408 nextToken();
3409 LambdaNextTimeAllowed = true;
3410 break;
3411
3412 case tok::comma:
3413 case tok::comment:
3414 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3415 nextToken();
3416 break;
3417
3418 case tok::kw_sizeof:
3419 case tok::greater:
3420 case tok::greaterequal:
3421 case tok::greatergreater:
3422 case tok::less:
3423 case tok::lessequal:
3424 case tok::lessless:
3425 case tok::equalequal:
3426 case tok::exclaim:
3427 case tok::exclaimequal:
3428 case tok::plus:
3429 case tok::minus:
3430 case tok::star:
3431 case tok::slash:
3432 LambdaNextTimeAllowed = true;
3433 // Just eat them.
3434 nextToken();
3435 break;
3436
3437 case tok::numeric_constant:
3438 case tok::coloncolon:
3439 case tok::kw_true:
3440 case tok::kw_false:
3441 // Just eat them.
3442 nextToken();
3443 break;
3444
3445 case tok::kw_static_cast:
3446 case tok::kw_const_cast:
3447 case tok::kw_reinterpret_cast:
3448 case tok::kw_dynamic_cast:
3449 nextToken();
3450 if (!FormatTok->is(tok::less))
3451 return;
3452
3453 nextToken();
3454 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3455 /*ClosingBraceKind=*/tok::greater);
3456 break;
3457
3458 default:
3459 if (!FormatTok->Tok.getIdentifierInfo()) {
3460 // Identifiers are part of the default case, we check for more then
3461 // tok::identifier to handle builtin type traits.
3462 return;
3463 }
3464
3465 // We need to differentiate identifiers for a template deduction guide,
3466 // variables, or function return types (the constraint expression has
3467 // ended before that), and basically all other cases. But it's easier to
3468 // check the other way around.
3469 assert(FormatTok->Previous);
3470 switch (FormatTok->Previous->Tok.getKind()) {
3471 case tok::coloncolon: // Nested identifier.
3472 case tok::ampamp: // Start of a function or variable for the
3473 case tok::pipepipe: // constraint expression. (binary)
3474 case tok::exclaim: // The same as above, but unary.
3475 case tok::kw_requires: // Initial identifier of a requires clause.
3476 case tok::equal: // Initial identifier of a concept declaration.
3477 break;
3478 default:
3479 return;
3480 }
3481
3482 // Read identifier with optional template declaration.
3483 nextToken();
3484 if (FormatTok->is(tok::less)) {
3485 nextToken();
3486 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3487 /*ClosingBraceKind=*/tok::greater);
3488 }
3489 break;
3490 }
3491 } while (!eof());
3492}
3493
3494bool UnwrappedLineParser::parseEnum() {
3495 const FormatToken &InitialToken = *FormatTok;
3496
3497 // Won't be 'enum' for NS_ENUMs.
3498 if (FormatTok->is(tok::kw_enum))
3499 nextToken();
3500
3501 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3502 // declarations. An "enum" keyword followed by a colon would be a syntax
3503 // error and thus assume it is just an identifier.
3504 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3505 return false;
3506
3507 // In protobuf, "enum" can be used as a field name.
3508 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3509 return false;
3510
3511 // Eat up enum class ...
3512 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3513 nextToken();
3514
3515 while (FormatTok->Tok.getIdentifierInfo() ||
3516 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3517 tok::greater, tok::comma, tok::question,
3518 tok::l_square, tok::r_square)) {
3519 nextToken();
3520 // We can have macros or attributes in between 'enum' and the enum name.
3521 if (FormatTok->is(tok::l_paren))
3522 parseParens();
3523 if (FormatTok->is(TT_AttributeSquare)) {
3524 parseSquare();
3525 // Consume the closing TT_AttributeSquare.
3526 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3527 nextToken();
3528 }
3529 if (FormatTok->is(tok::identifier)) {
3530 nextToken();
3531 // If there are two identifiers in a row, this is likely an elaborate
3532 // return type. In Java, this can be "implements", etc.
3533 if (Style.isCpp() && FormatTok->is(tok::identifier))
3534 return false;
3535 }
3536 }
3537
3538 // Just a declaration or something is wrong.
3539 if (FormatTok->isNot(tok::l_brace))
3540 return true;
3541 FormatTok->setFinalizedType(TT_EnumLBrace);
3542 FormatTok->setBlockKind(BK_Block);
3543
3544 if (Style.Language == FormatStyle::LK_Java) {
3545 // Java enums are different.
3546 parseJavaEnumBody();
3547 return true;
3548 }
3549 if (Style.Language == FormatStyle::LK_Proto) {
3550 parseBlock(/*MustBeDeclaration=*/true);
3551 return true;
3552 }
3553
3554 if (!Style.AllowShortEnumsOnASingleLine &&
3555 ShouldBreakBeforeBrace(Style, InitialToken)) {
3556 addUnwrappedLine();
3557 }
3558 // Parse enum body.
3559 nextToken();
3560 if (!Style.AllowShortEnumsOnASingleLine) {
3561 addUnwrappedLine();
3562 Line->Level += 1;
3563 }
3564 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3565 /*IsEnum=*/true);
3567 Line->Level -= 1;
3568 if (HasError) {
3569 if (FormatTok->is(tok::semi))
3570 nextToken();
3571 addUnwrappedLine();
3572 }
3573 return true;
3574
3575 // There is no addUnwrappedLine() here so that we fall through to parsing a
3576 // structural element afterwards. Thus, in "enum A {} n, m;",
3577 // "} n, m;" will end up in one unwrapped line.
3578}
3579
3580bool UnwrappedLineParser::parseStructLike() {
3581 // parseRecord falls through and does not yet add an unwrapped line as a
3582 // record declaration or definition can start a structural element.
3583 parseRecord();
3584 // This does not apply to Java, JavaScript and C#.
3585 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3586 Style.isCSharp()) {
3587 if (FormatTok->is(tok::semi))
3588 nextToken();
3589 addUnwrappedLine();
3590 return true;
3591 }
3592 return false;
3593}
3594
3595namespace {
3596// A class used to set and restore the Token position when peeking
3597// ahead in the token source.
3598class ScopedTokenPosition {
3599 unsigned StoredPosition;
3600 FormatTokenSource *Tokens;
3601
3602public:
3603 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3604 assert(Tokens && "Tokens expected to not be null");
3605 StoredPosition = Tokens->getPosition();
3606 }
3607
3608 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3609};
3610} // namespace
3611
3612// Look to see if we have [[ by looking ahead, if
3613// its not then rewind to the original position.
3614bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3615 ScopedTokenPosition AutoPosition(Tokens);
3616 FormatToken *Tok = Tokens->getNextToken();
3617 // We already read the first [ check for the second.
3618 if (!Tok->is(tok::l_square))
3619 return false;
3620 // Double check that the attribute is just something
3621 // fairly simple.
3622 while (Tok->isNot(tok::eof)) {
3623 if (Tok->is(tok::r_square))
3624 break;
3625 Tok = Tokens->getNextToken();
3626 }
3627 if (Tok->is(tok::eof))
3628 return false;
3629 Tok = Tokens->getNextToken();
3630 if (!Tok->is(tok::r_square))
3631 return false;
3632 Tok = Tokens->getNextToken();
3633 if (Tok->is(tok::semi))
3634 return false;
3635 return true;
3636}
3637
3638void UnwrappedLineParser::parseJavaEnumBody() {
3639 assert(FormatTok->is(tok::l_brace));
3640 const FormatToken *OpeningBrace = FormatTok;
3641
3642 // Determine whether the enum is simple, i.e. does not have a semicolon or
3643 // constants with class bodies. Simple enums can be formatted like braced
3644 // lists, contracted to a single line, etc.
3645 unsigned StoredPosition = Tokens->getPosition();
3646 bool IsSimple = true;
3647 FormatToken *Tok = Tokens->getNextToken();
3648 while (!Tok->is(tok::eof)) {
3649 if (Tok->is(tok::r_brace))
3650 break;
3651 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3652 IsSimple = false;
3653 break;
3654 }
3655 // FIXME: This will also mark enums with braces in the arguments to enum
3656 // constants as "not simple". This is probably fine in practice, though.
3657 Tok = Tokens->getNextToken();
3658 }
3659 FormatTok = Tokens->setPosition(StoredPosition);
3660
3661 if (IsSimple) {
3662 nextToken();
3663 parseBracedList();
3664 addUnwrappedLine();
3665 return;
3666 }
3667
3668 // Parse the body of a more complex enum.
3669 // First add a line for everything up to the "{".
3670 nextToken();
3671 addUnwrappedLine();
3672 ++Line->Level;
3673
3674 // Parse the enum constants.
3675 while (!eof()) {
3676 if (FormatTok->is(tok::l_brace)) {
3677 // Parse the constant's class body.
3678 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3679 /*MunchSemi=*/false);
3680 } else if (FormatTok->is(tok::l_paren)) {
3681 parseParens();
3682 } else if (FormatTok->is(tok::comma)) {
3683 nextToken();
3684 addUnwrappedLine();
3685 } else if (FormatTok->is(tok::semi)) {
3686 nextToken();
3687 addUnwrappedLine();
3688 break;
3689 } else if (FormatTok->is(tok::r_brace)) {
3690 addUnwrappedLine();
3691 break;
3692 } else {
3693 nextToken();
3694 }
3695 }
3696
3697 // Parse the class body after the enum's ";" if any.
3698 parseLevel(OpeningBrace);
3699 nextToken();
3700 --Line->Level;
3701 addUnwrappedLine();
3702}
3703
3704void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3705 const FormatToken &InitialToken = *FormatTok;
3706 nextToken();
3707 handleAttributes();
3708
3709 // The actual identifier can be a nested name specifier, and in macros
3710 // it is often token-pasted.
3711 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3712 tok::kw___attribute, tok::kw___declspec,
3713 tok::kw_alignas) ||
3714 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3715 FormatTok->isOneOf(tok::period, tok::comma))) {
3716 if (Style.isJavaScript() &&
3717 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3718 // JavaScript/TypeScript supports inline object types in
3719 // extends/implements positions:
3720 // class Foo implements {bar: number} { }
3721 nextToken();
3722 if (FormatTok->is(tok::l_brace)) {
3723 tryToParseBracedList();
3724 continue;
3725 }
3726 }
3727 bool IsNonMacroIdentifier =
3728 FormatTok->is(tok::identifier) &&
3729 FormatTok->TokenText != FormatTok->TokenText.upper();
3730 nextToken();
3731 // We can have macros in between 'class' and the class name.
3732 if (!IsNonMacroIdentifier) {
3733 if (FormatTok->is(tok::l_paren)) {
3734 parseParens();
3735 }
3736 }
3737 }
3738
3739 // Note that parsing away template declarations here leads to incorrectly
3740 // accepting function declarations as record declarations.
3741 // In general, we cannot solve this problem. Consider:
3742 // class A<int> B() {}
3743 // which can be a function definition or a class definition when B() is a
3744 // macro. If we find enough real-world cases where this is a problem, we
3745 // can parse for the 'template' keyword in the beginning of the statement,
3746 // and thus rule out the record production in case there is no template
3747 // (this would still leave us with an ambiguity between template function
3748 // and class declarations).
3749 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3750 do {
3751 if (FormatTok->is(tok::l_brace)) {
3752 calculateBraceTypes(/*ExpectClassBody=*/true);
3753 if (!tryToParseBracedList())
3754 break;
3755 }
3756 if (FormatTok->is(tok::l_square)) {
3757 FormatToken *Previous = FormatTok->Previous;
3758 if (!Previous ||
3759 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3760 // Don't try parsing a lambda if we had a closing parenthesis before,
3761 // it was probably a pointer to an array: int (*)[].
3762 if (!tryToParseLambda())
3763 break;
3764 } else {
3765 parseSquare();
3766 continue;
3767 }
3768 }
3769 if (FormatTok->is(tok::semi))
3770 return;
3771 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3772 addUnwrappedLine();
3773 nextToken();
3774 parseCSharpGenericTypeConstraint();
3775 break;
3776 }
3777 nextToken();
3778 } while (!eof());
3779 }
3780
3781 auto GetBraceType = [](const FormatToken &RecordTok) {
3782 switch (RecordTok.Tok.getKind()) {
3783 case tok::kw_class:
3784 return TT_ClassLBrace;
3785 case tok::kw_struct:
3786 return TT_StructLBrace;
3787 case tok::kw_union:
3788 return TT_UnionLBrace;
3789 default:
3790 // Useful for e.g. interface.
3791 return TT_RecordLBrace;
3792 }
3793 };
3794 if (FormatTok->is(tok::l_brace)) {
3795 FormatTok->setFinalizedType(GetBraceType(InitialToken));
3796 if (ParseAsExpr) {
3797 parseChildBlock();
3798 } else {
3799 if (ShouldBreakBeforeBrace(Style, InitialToken))
3800 addUnwrappedLine();
3801
3802 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3803 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3804 }
3805 }
3806 // There is no addUnwrappedLine() here so that we fall through to parsing a
3807 // structural element afterwards. Thus, in "class A {} n, m;",
3808 // "} n, m;" will end up in one unwrapped line.
3809}
3810
3811void UnwrappedLineParser::parseObjCMethod() {
3812 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3813 "'(' or identifier expected.");
3814 do {
3815 if (FormatTok->is(tok::semi)) {
3816 nextToken();
3817 addUnwrappedLine();
3818 return;
3819 } else if (FormatTok->is(tok::l_brace)) {
3820 if (Style.BraceWrapping.AfterFunction)
3821 addUnwrappedLine();
3822 parseBlock();
3823 addUnwrappedLine();
3824 return;
3825 } else {
3826 nextToken();
3827 }
3828 } while (!eof());
3829}
3830
3831void UnwrappedLineParser::parseObjCProtocolList() {
3832 assert(FormatTok->is(tok::less) && "'<' expected.");
3833 do {
3834 nextToken();
3835 // Early exit in case someone forgot a close angle.
3836 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3837 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3838 return;
3839 }
3840 } while (!eof() && FormatTok->isNot(tok::greater));
3841 nextToken(); // Skip '>'.
3842}
3843
3844void UnwrappedLineParser::parseObjCUntilAtEnd() {
3845 do {
3846 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3847 nextToken();
3848 addUnwrappedLine();
3849 break;
3850 }
3851 if (FormatTok->is(tok::l_brace)) {
3852 parseBlock();
3853 // In ObjC interfaces, nothing should be following the "}".
3854 addUnwrappedLine();
3855 } else if (FormatTok->is(tok::r_brace)) {
3856 // Ignore stray "}". parseStructuralElement doesn't consume them.
3857 nextToken();
3858 addUnwrappedLine();
3859 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3860 nextToken();
3861 parseObjCMethod();
3862 } else {
3863 parseStructuralElement();
3864 }
3865 } while (!eof());
3866}
3867
3868void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3869 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3870 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3871 nextToken();
3872 nextToken(); // interface name
3873
3874 // @interface can be followed by a lightweight generic
3875 // specialization list, then either a base class or a category.
3876 if (FormatTok->is(tok::less))
3877 parseObjCLightweightGenerics();
3878 if (FormatTok->is(tok::colon)) {
3879 nextToken();
3880 nextToken(); // base class name
3881 // The base class can also have lightweight generics applied to it.
3882 if (FormatTok->is(tok::less))
3883 parseObjCLightweightGenerics();
3884 } else if (FormatTok->is(tok::l_paren)) {
3885 // Skip category, if present.
3886 parseParens();
3887 }
3888
3889 if (FormatTok->is(tok::less))
3890 parseObjCProtocolList();
3891
3892 if (FormatTok->is(tok::l_brace)) {
3894 addUnwrappedLine();
3895 parseBlock(/*MustBeDeclaration=*/true);
3896 }
3897
3898 // With instance variables, this puts '}' on its own line. Without instance
3899 // variables, this ends the @interface line.
3900 addUnwrappedLine();
3901
3902 parseObjCUntilAtEnd();
3903}
3904
3905void UnwrappedLineParser::parseObjCLightweightGenerics() {
3906 assert(FormatTok->is(tok::less));
3907 // Unlike protocol lists, generic parameterizations support
3908 // nested angles:
3909 //
3910 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3911 // NSObject <NSCopying, NSSecureCoding>
3912 //
3913 // so we need to count how many open angles we have left.
3914 unsigned NumOpenAngles = 1;
3915 do {
3916 nextToken();
3917 // Early exit in case someone forgot a close angle.
3918 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3919 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3920 break;
3921 }
3922 if (FormatTok->is(tok::less)) {
3923 ++NumOpenAngles;
3924 } else if (FormatTok->is(tok::greater)) {
3925 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3926 --NumOpenAngles;
3927 }
3928 } while (!eof() && NumOpenAngles != 0);
3929 nextToken(); // Skip '>'.
3930}
3931
3932// Returns true for the declaration/definition form of @protocol,
3933// false for the expression form.
3934bool UnwrappedLineParser::parseObjCProtocol() {
3935 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3936 nextToken();
3937
3938 if (FormatTok->is(tok::l_paren)) {
3939 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3940 return false;
3941 }
3942
3943 // The definition/declaration form,
3944 // @protocol Foo
3945 // - (int)someMethod;
3946 // @end
3947
3948 nextToken(); // protocol name
3949
3950 if (FormatTok->is(tok::less))
3951 parseObjCProtocolList();
3952
3953 // Check for protocol declaration.
3954 if (FormatTok->is(tok::semi)) {
3955 nextToken();
3956 addUnwrappedLine();
3957 return true;
3958 }
3959
3960 addUnwrappedLine();
3961 parseObjCUntilAtEnd();
3962 return true;
3963}
3964
3965void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3966 bool IsImport = FormatTok->is(Keywords.kw_import);
3967 assert(IsImport || FormatTok->is(tok::kw_export));
3968 nextToken();
3969
3970 // Consume the "default" in "export default class/function".
3971 if (FormatTok->is(tok::kw_default))
3972 nextToken();
3973
3974 // Consume "async function", "function" and "default function", so that these
3975 // get parsed as free-standing JS functions, i.e. do not require a trailing
3976 // semicolon.
3977 if (FormatTok->is(Keywords.kw_async))
3978 nextToken();
3979 if (FormatTok->is(Keywords.kw_function)) {
3980 nextToken();
3981 return;
3982 }
3983
3984 // For imports, `export *`, `export {...}`, consume the rest of the line up
3985 // to the terminating `;`. For everything else, just return and continue
3986 // parsing the structural element, i.e. the declaration or expression for
3987 // `export default`.
3988 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3989 !FormatTok->isStringLiteral()) {
3990 return;
3991 }
3992
3993 while (!eof()) {
3994 if (FormatTok->is(tok::semi))
3995 return;
3996 if (Line->Tokens.empty()) {
3997 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3998 // import statement should terminate.
3999 return;
4000 }
4001 if (FormatTok->is(tok::l_brace)) {
4002 FormatTok->setBlockKind(BK_Block);
4003 nextToken();
4004 parseBracedList();
4005 } else {
4006 nextToken();
4007 }
4008 }
4009}
4010
4011void UnwrappedLineParser::parseStatementMacro() {
4012 nextToken();
4013 if (FormatTok->is(tok::l_paren))
4014 parseParens();
4015 if (FormatTok->is(tok::semi))
4016 nextToken();
4017 addUnwrappedLine();
4018}
4019
4020void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4021 // consume things like a::`b.c[d:e] or a::*
4022 while (true) {
4023 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4024 tok::coloncolon, tok::hash) ||
4025 Keywords.isVerilogIdentifier(*FormatTok)) {
4026 nextToken();
4027 } else if (FormatTok->is(tok::l_square)) {
4028 parseSquare();
4029 } else {
4030 break;
4031 }
4032 }
4033}
4034
4035void UnwrappedLineParser::parseVerilogSensitivityList() {
4036 if (!FormatTok->is(tok::at))
4037 return;
4038 nextToken();
4039 // A block event expression has 2 at signs.
4040 if (FormatTok->is(tok::at))
4041 nextToken();
4042 switch (FormatTok->Tok.getKind()) {
4043 case tok::star:
4044 nextToken();
4045 break;
4046 case tok::l_paren:
4047 parseParens();
4048 break;
4049 default:
4050 parseVerilogHierarchyIdentifier();
4051 break;
4052 }
4053}
4054
4055unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4056 unsigned AddLevels = 0;
4057
4058 if (FormatTok->is(Keywords.kw_clocking)) {
4059 nextToken();
4060 if (Keywords.isVerilogIdentifier(*FormatTok))
4061 nextToken();
4062 parseVerilogSensitivityList();
4063 if (FormatTok->is(tok::semi))
4064 nextToken();
4065 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4066 Keywords.kw_casez, Keywords.kw_randcase,
4067 Keywords.kw_randsequence)) {
4068 if (Style.IndentCaseLabels)
4069 AddLevels++;
4070 nextToken();
4071 if (FormatTok->is(tok::l_paren)) {
4072 FormatTok->setFinalizedType(TT_ConditionLParen);
4073 parseParens();
4074 }
4075 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4076 nextToken();
4077 // The case header has no semicolon.
4078 } else {
4079 // "module" etc.
4080 nextToken();
4081 // all the words like the name of the module and specifiers like
4082 // "automatic" and the width of function return type
4083 while (true) {
4084 if (FormatTok->is(tok::l_square)) {
4085 auto Prev = FormatTok->getPreviousNonComment();
4086 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4087 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4088 parseSquare();
4089 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4090 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4091 nextToken();
4092 } else {
4093 break;
4094 }
4095 }
4096
4097 auto NewLine = [this]() {
4098 addUnwrappedLine();
4099 Line->IsContinuation = true;
4100 };
4101
4102 // package imports
4103 while (FormatTok->is(Keywords.kw_import)) {
4104 NewLine();
4105 nextToken();
4106 parseVerilogHierarchyIdentifier();
4107 if (FormatTok->is(tok::semi))
4108 nextToken();
4109 }
4110
4111 // parameters and ports
4112 if (FormatTok->is(Keywords.kw_verilogHash)) {
4113 NewLine();
4114 nextToken();
4115 if (FormatTok->is(tok::l_paren))
4116 parseParens();
4117 }
4118 if (FormatTok->is(tok::l_paren)) {
4119 NewLine();
4120 parseParens();
4121 }
4122
4123 // extends and implements
4124 if (FormatTok->is(Keywords.kw_extends)) {
4125 NewLine();
4126 nextToken();
4127 parseVerilogHierarchyIdentifier();
4128 if (FormatTok->is(tok::l_paren))
4129 parseParens();
4130 }
4131 if (FormatTok->is(Keywords.kw_implements)) {
4132 NewLine();
4133 do {
4134 nextToken();
4135 parseVerilogHierarchyIdentifier();
4136 } while (FormatTok->is(tok::comma));
4137 }
4138
4139 // Coverage event for cover groups.
4140 if (FormatTok->is(tok::at)) {
4141 NewLine();
4142 parseVerilogSensitivityList();
4143 }
4144
4145 if (FormatTok->is(tok::semi))
4146 nextToken(/*LevelDifference=*/1);
4147 addUnwrappedLine();
4148 }
4149
4150 return AddLevels;
4151}
4152
4153void UnwrappedLineParser::parseVerilogTable() {
4154 assert(FormatTok->is(Keywords.kw_table));
4155 nextToken(/*LevelDifference=*/1);
4156 addUnwrappedLine();
4157
4158 auto InitialLevel = Line->Level++;
4159 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4160 FormatToken *Tok = FormatTok;
4161 nextToken();
4162 if (Tok->is(tok::semi))
4163 addUnwrappedLine();
4164 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4165 Tok->setFinalizedType(TT_VerilogTableItem);
4166 }
4167 Line->Level = InitialLevel;
4168 nextToken(/*LevelDifference=*/-1);
4169 addUnwrappedLine();
4170}
4171
4172void UnwrappedLineParser::parseVerilogCaseLabel() {
4173 // The label will get unindented in AnnotatingParser. If there are no leading
4174 // spaces, indent the rest here so that things inside the block will be
4175 // indented relative to things outside. We don't use parseLabel because we
4176 // don't know whether this colon is a label or a ternary expression at this
4177 // point.
4178 auto OrigLevel = Line->Level;
4179 auto FirstLine = CurrentLines->size();
4180 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4181 ++Line->Level;
4182 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4183 --Line->Level;
4184 parseStructuralElement();
4185 // Restore the indentation in both the new line and the line that has the
4186 // label.
4187 if (CurrentLines->size() > FirstLine)
4188 (*CurrentLines)[FirstLine].Level = OrigLevel;
4189 Line->Level = OrigLevel;
4190}
4191
4192bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4193 for (const auto &N : Line.Tokens) {
4194 if (N.Tok->MacroCtx)
4195 return true;
4196 for (const UnwrappedLine &Child : N.Children)
4197 if (containsExpansion(Child))
4198 return true;
4199 }
4200 return false;
4201}
4202
4203void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4204 if (Line->Tokens.empty())
4205 return;
4206 LLVM_DEBUG({
4207 if (!parsingPPDirective()) {
4208 llvm::dbgs() << "Adding unwrapped line:\n";
4209 printDebugInfo(*Line);
4210 }
4211 });
4212
4213 // If this line closes a block when in Whitesmiths mode, remember that
4214 // information so that the level can be decreased after the line is added.
4215 // This has to happen after the addition of the line since the line itself
4216 // needs to be indented.
4217 bool ClosesWhitesmithsBlock =
4218 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4220
4221 // If the current line was expanded from a macro call, we use it to
4222 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4223 // line and the unexpanded token stream.
4224 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4225 if (!Reconstruct)
4226 Reconstruct.emplace(Line->Level, Unexpanded);
4227 Reconstruct->addLine(*Line);
4228
4229 // While the reconstructed unexpanded lines are stored in the normal
4230 // flow of lines, the expanded lines are stored on the side to be analyzed
4231 // in an extra step.
4232 CurrentExpandedLines.push_back(std::move(*Line));
4233
4234 if (Reconstruct->finished()) {
4235 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4236 assert(!Reconstructed.Tokens.empty() &&
4237 "Reconstructed must at least contain the macro identifier.");
4238 assert(!parsingPPDirective());
4239 LLVM_DEBUG({
4240 llvm::dbgs() << "Adding unexpanded line:\n";
4241 printDebugInfo(Reconstructed);
4242 });
4243 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4244 Lines.push_back(std::move(Reconstructed));
4245 CurrentExpandedLines.clear();
4246 Reconstruct.reset();
4247 }
4248 } else {
4249 // At the top level we only get here when no unexpansion is going on, or
4250 // when conditional formatting led to unfinished macro reconstructions.
4251 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4252 CurrentLines->push_back(std::move(*Line));
4253 }
4254 Line->Tokens.clear();
4255 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4256 Line->FirstStartColumn = 0;
4257 Line->IsContinuation = false;
4258
4259 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4260 --Line->Level;
4261 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4262 CurrentLines->append(
4263 std::make_move_iterator(PreprocessorDirectives.begin()),
4264 std::make_move_iterator(PreprocessorDirectives.end()));
4265 PreprocessorDirectives.clear();
4266 }
4267 // Disconnect the current token from the last token on the previous line.
4268 FormatTok->Previous = nullptr;
4269}
4270
4271bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4272
4273bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4274 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4275 FormatTok.NewlinesBefore > 0;
4276}
4277
4278// Checks if \p FormatTok is a line comment that continues the line comment
4279// section on \p Line.
4280static bool
4282 const UnwrappedLine &Line,
4283 const llvm::Regex &CommentPragmasRegex) {
4284 if (Line.Tokens.empty())
4285 return false;
4286
4287 StringRef IndentContent = FormatTok.TokenText;
4288 if (FormatTok.TokenText.startswith("//") ||
4289 FormatTok.TokenText.startswith("/*")) {
4290 IndentContent = FormatTok.TokenText.substr(2);
4291 }
4292 if (CommentPragmasRegex.match(IndentContent))
4293 return false;
4294
4295 // If Line starts with a line comment, then FormatTok continues the comment
4296 // section if its original column is greater or equal to the original start
4297 // column of the line.
4298 //
4299 // Define the min column token of a line as follows: if a line ends in '{' or
4300 // contains a '{' followed by a line comment, then the min column token is
4301 // that '{'. Otherwise, the min column token of the line is the first token of
4302 // the line.
4303 //
4304 // If Line starts with a token other than a line comment, then FormatTok
4305 // continues the comment section if its original column is greater than the
4306 // original start column of the min column token of the line.
4307 //
4308 // For example, the second line comment continues the first in these cases:
4309 //
4310 // // first line
4311 // // second line
4312 //
4313 // and:
4314 //
4315 // // first line
4316 // // second line
4317 //
4318 // and:
4319 //
4320 // int i; // first line
4321 // // second line
4322 //
4323 // and:
4324 //
4325 // do { // first line
4326 // // second line
4327 // int i;
4328 // } while (true);
4329 //
4330 // and:
4331 //
4332 // enum {
4333 // a, // first line
4334 // // second line
4335 // b
4336 // };
4337 //
4338 // The second line comment doesn't continue the first in these cases:
4339 //
4340 // // first line
4341 // // second line
4342 //
4343 // and:
4344 //
4345 // int i; // first line
4346 // // second line
4347 //
4348 // and:
4349 //
4350 // do { // first line
4351 // // second line
4352 // int i;
4353 // } while (true);
4354 //
4355 // and:
4356 //
4357 // enum {
4358 // a, // first line
4359 // // second line
4360 // };
4361 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4362
4363 // Scan for '{//'. If found, use the column of '{' as a min column for line
4364 // comment section continuation.
4365 const FormatToken *PreviousToken = nullptr;
4366 for (const UnwrappedLineNode &Node : Line.Tokens) {
4367 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4368 isLineComment(*Node.Tok)) {
4369 MinColumnToken = PreviousToken;
4370 break;
4371 }
4372 PreviousToken = Node.Tok;
4373
4374 // Grab the last newline preceding a token in this unwrapped line.
4375 if (Node.Tok->NewlinesBefore > 0)
4376 MinColumnToken = Node.Tok;
4377 }
4378 if (PreviousToken && PreviousToken->is(tok::l_brace))
4379 MinColumnToken = PreviousToken;
4380
4381 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4382 MinColumnToken);
4383}
4384
4385void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4386 bool JustComments = Line->Tokens.empty();
4387 for (FormatToken *Tok : CommentsBeforeNextToken) {
4388 // Line comments that belong to the same line comment section are put on the
4389 // same line since later we might want to reflow content between them.
4390 // Additional fine-grained breaking of line comment sections is controlled
4391 // by the class BreakableLineCommentSection in case it is desirable to keep
4392 // several line comment sections in the same unwrapped line.
4393 //
4394 // FIXME: Consider putting separate line comment sections as children to the
4395 // unwrapped line instead.
4396 Tok->ContinuesLineCommentSection =
4397 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4398 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4399 addUnwrappedLine();
4400 pushToken(Tok);
4401 }
4402 if (NewlineBeforeNext && JustComments)
4403 addUnwrappedLine();
4404 CommentsBeforeNextToken.clear();
4405}
4406
4407void UnwrappedLineParser::nextToken(int LevelDifference) {
4408 if (eof())
4409 return;
4410 flushComments(isOnNewLine(*FormatTok));
4411 pushToken(FormatTok);
4412 FormatToken *Previous = FormatTok;
4413 if (!Style.isJavaScript())
4414 readToken(LevelDifference);
4415 else
4416 readTokenWithJavaScriptASI();
4417 FormatTok->Previous = Previous;
4418 if (Style.isVerilog()) {
4419 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4420 // keywords like `begin`, we can't treat them the same as left braces
4421 // because some contexts require one of them. For example structs use
4422 // braces and if blocks use keywords, and a left brace can occur in an if
4423 // statement, but it is not a block. For keywords like `end`, we simply
4424 // treat them the same as right braces.
4425 if (Keywords.isVerilogEnd(*FormatTok))
4426 FormatTok->Tok.setKind(tok::r_brace);
4427 }
4428}
4429
4430void UnwrappedLineParser::distributeComments(
4431 const SmallVectorImpl<FormatToken *> &Comments,
4432 const FormatToken *NextTok) {
4433 // Whether or not a line comment token continues a line is controlled by
4434 // the method continuesLineCommentSection, with the following caveat:
4435 //
4436 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4437 // that each comment line from the trail is aligned with the next token, if
4438 // the next token exists. If a trail exists, the beginning of the maximal
4439 // trail is marked as a start of a new comment section.
4440 //
4441 // For example in this code:
4442 //
4443 // int a; // line about a
4444 // // line 1 about b
4445 // // line 2 about b
4446 // int b;
4447 //
4448 // the two lines about b form a maximal trail, so there are two sections, the
4449 // first one consisting of the single comment "// line about a" and the
4450 // second one consisting of the next two comments.
4451 if (Comments.empty())
4452 return;
4453 bool ShouldPushCommentsInCurrentLine = true;
4454 bool HasTrailAlignedWithNextToken = false;
4455 unsigned StartOfTrailAlignedWithNextToken = 0;
4456 if (NextTok) {
4457 // We are skipping the first element intentionally.
4458 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4459 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4460 HasTrailAlignedWithNextToken = true;
4461 StartOfTrailAlignedWithNextToken = i;
4462 }
4463 }
4464 }
4465 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4466 FormatToken *FormatTok = Comments[i];
4467 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4468 FormatTok->ContinuesLineCommentSection = false;
4469 } else {
4470 FormatTok->ContinuesLineCommentSection =
4471 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4472 }
4473 if (!FormatTok->ContinuesLineCommentSection &&
4474 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4475 ShouldPushCommentsInCurrentLine = false;
4476 }
4477 if (ShouldPushCommentsInCurrentLine)
4478 pushToken(FormatTok);
4479 else
4480 CommentsBeforeNextToken.push_back(FormatTok);
4481 }
4482}
4483
4484void UnwrappedLineParser::readToken(int LevelDifference) {
4485 SmallVector<FormatToken *, 1> Comments;
4486 bool PreviousWasComment = false;
4487 bool FirstNonCommentOnLine = false;
4488 do {
4489 FormatTok = Tokens->getNextToken();
4490 assert(FormatTok);
4491 while (FormatTok->getType() == TT_ConflictStart ||
4492 FormatTok->getType() == TT_ConflictEnd ||
4493 FormatTok->getType() == TT_ConflictAlternative) {
4494 if (FormatTok->getType() == TT_ConflictStart)
4495 conditionalCompilationStart(/*Unreachable=*/false);
4496 else if (FormatTok->getType() == TT_ConflictAlternative)
4497 conditionalCompilationAlternative();
4498 else if (FormatTok->getType() == TT_ConflictEnd)
4499 conditionalCompilationEnd();
4500 FormatTok = Tokens->getNextToken();
4501 FormatTok->MustBreakBefore = true;
4502 }
4503
4504 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4505 const FormatToken &Tok,
4506 bool PreviousWasComment) {
4507 auto IsFirstOnLine = [](const FormatToken &Tok) {
4508 return Tok.HasUnescapedNewline || Tok.IsFirst;
4509 };
4510
4511 // Consider preprocessor directives preceded by block comments as first
4512 // on line.
4513 if (PreviousWasComment)
4514 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4515 return IsFirstOnLine(Tok);
4516 };
4517
4518 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4519 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4520 PreviousWasComment = FormatTok->is(tok::comment);
4521
4522 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4523 (!Style.isVerilog() ||
4524 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4525 FirstNonCommentOnLine) {
4526 distributeComments(Comments, FormatTok);
4527 Comments.clear();
4528 // If there is an unfinished unwrapped line, we flush the preprocessor
4529 // directives only after that unwrapped line was finished later.
4530 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4531 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4532 assert((LevelDifference >= 0 ||
4533 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4534 "LevelDifference makes Line->Level negative");
4535 Line->Level += LevelDifference;
4536 // Comments stored before the preprocessor directive need to be output
4537 // before the preprocessor directive, at the same level as the
4538 // preprocessor directive, as we consider them to apply to the directive.
4540 PPBranchLevel > 0) {
4541 Line->Level += PPBranchLevel;
4542 }
4543 flushComments(isOnNewLine(*FormatTok));
4544 parsePPDirective();
4545 PreviousWasComment = FormatTok->is(tok::comment);
4546 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4547 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4548 }
4549
4550 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4551 !Line->InPPDirective) {
4552 continue;
4553 }
4554
4555 if (FormatTok->is(tok::identifier) &&
4556 Macros.defined(FormatTok->TokenText) &&
4557 // FIXME: Allow expanding macros in preprocessor directives.
4558 !Line->InPPDirective) {
4559 FormatToken *ID = FormatTok;
4560 unsigned Position = Tokens->getPosition();
4561
4562 // To correctly parse the code, we need to replace the tokens of the macro
4563 // call with its expansion.
4564 auto PreCall = std::move(Line);
4565 Line.reset(new UnwrappedLine);
4566 bool OldInExpansion = InExpansion;
4567 InExpansion = true;
4568 // We parse the macro call into a new line.
4569 auto Args = parseMacroCall();
4570 InExpansion = OldInExpansion;
4571 assert(Line->Tokens.front().Tok == ID);
4572 // And remember the unexpanded macro call tokens.
4573 auto UnexpandedLine = std::move(Line);
4574 // Reset to the old line.
4575 Line = std::move(PreCall);
4576
4577 LLVM_DEBUG({
4578 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4579 if (Args) {
4580 llvm::dbgs() << "(";
4581 for (const auto &Arg : Args.value())
4582 for (const auto &T : Arg)
4583 llvm::dbgs() << T->TokenText << " ";
4584 llvm::dbgs() << ")";
4585 }
4586 llvm::dbgs() << "\n";
4587 });
4588 if (Macros.objectLike(ID->TokenText) && Args &&
4589 !Macros.hasArity(ID->TokenText, Args->size())) {
4590 // The macro is either
4591 // - object-like, but we got argumnets, or
4592 // - overloaded to be both object-like and function-like, but none of
4593 // the function-like arities match the number of arguments.
4594 // Thus, expand as object-like macro.
4595 LLVM_DEBUG(llvm::dbgs()
4596 << "Macro \"" << ID->TokenText
4597 << "\" not overloaded for arity " << Args->size()
4598 << "or not function-like, using object-like overload.");
4599 Args.reset();
4600 UnexpandedLine->Tokens.resize(1);
4601 Tokens->setPosition(Position);
4602 nextToken();
4603 assert(!Args && Macros.objectLike(ID->TokenText));
4604 }
4605 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4606 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4607 // Next, we insert the expanded tokens in the token stream at the
4608 // current position, and continue parsing.
4609 Unexpanded[ID] = std::move(UnexpandedLine);
4610 SmallVector<FormatToken *, 8> Expansion =
4611 Macros.expand(ID, std::move(Args));
4612 if (!Expansion.empty())
4613 FormatTok = Tokens->insertTokens(Expansion);
4614
4615 LLVM_DEBUG({
4616 llvm::dbgs() << "Expanded: ";
4617 for (const auto &T : Expansion)
4618 llvm::dbgs() << T->TokenText << " ";
4619 llvm::dbgs() << "\n";
4620 });
4621 } else {
4622 LLVM_DEBUG({
4623 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4624 << "\", because it was used ";
4625 if (Args)
4626 llvm::dbgs() << "with " << Args->size();
4627 else
4628 llvm::dbgs() << "without";
4629 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4630 });
4631 Tokens->setPosition(Position);
4632 FormatTok = ID;
4633 }
4634 }
4635
4636 if (!FormatTok->is(tok::comment)) {
4637 distributeComments(Comments, FormatTok);
4638 Comments.clear();
4639 return;
4640 }
4641
4642 Comments.push_back(FormatTok);
4643 } while (!eof());
4644
4645 distributeComments(Comments, nullptr);
4646 Comments.clear();
4647}
4648
4649namespace {
4650template <typename Iterator>
4651void pushTokens(Iterator Begin, Iterator End,
4653 for (auto I = Begin; I != End; ++I) {
4654 Into.push_back(I->Tok);
4655 for (const auto &Child : I->Children)
4656 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4657 }
4658}
4659} // namespace
4660
4661std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4662UnwrappedLineParser::parseMacroCall() {
4663 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4664 assert(Line->Tokens.empty());
4665 nextToken();
4666 if (!FormatTok->is(tok::l_paren))
4667 return Args;
4668 unsigned Position = Tokens->getPosition();
4669 FormatToken *Tok = FormatTok;
4670 nextToken();
4671 Args.emplace();
4672 auto ArgStart = std::prev(Line->Tokens.end());
4673
4674 int Parens = 0;
4675 do {
4676 switch (FormatTok->Tok.getKind()) {
4677 case tok::l_paren:
4678 ++Parens;
4679 nextToken();
4680 break;
4681 case tok::r_paren: {
4682 if (Parens > 0) {
4683 --Parens;
4684 nextToken();
4685 break;
4686 }
4687 Args->push_back({});
4688 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4689 nextToken();
4690 return Args;
4691 }
4692 case tok::comma: {
4693 if (Parens > 0) {
4694 nextToken();
4695 break;
4696 }
4697 Args->push_back({});
4698 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4699 nextToken();
4700 ArgStart = std::prev(Line->Tokens.end());
4701 break;
4702 }
4703 default:
4704 nextToken();
4705 break;
4706 }
4707 } while (!eof());
4708 Line->Tokens.resize(1);
4709 Tokens->setPosition(Position);
4710 FormatTok = Tok;
4711 return {};
4712}
4713
4714void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4715 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4716 if (MustBreakBeforeNextToken) {
4717 Line->Tokens.back().Tok->MustBreakBefore = true;
4718 MustBreakBeforeNextToken = false;
4719 }
4720}
4721
4722} // end namespace format
4723} // end namespace clang
DynTypedNode Node
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:2775
This file contains the main building blocks of macro support in clang-format.
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
do v
Definition: arm_acle.h:76
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:61
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:181
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:109
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
void setKind(tok::TokenKind K)
Definition: Token.h:94
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:66
tok::TokenKind getKind() const
Definition: Token.h:93
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:100
bool isNot(tok::TokenKind K) const
Definition: Token.h:99
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:190
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool defined(llvm::StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
llvm::SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool objectLike(llvm::StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
bool hasArity(llvm::StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:66
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1847
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex)
static bool isCOperatorFollowingVar(tok::TokenKind kind)
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1840
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:162
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:933
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1765
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1758
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1803
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1777
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1731
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1386
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1696
bool AfterClass
Wrap class definitions.
Definition: Format.h:1014
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1079
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1093
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1029
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1170
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1065
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1061
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1017
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1045
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1107
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ LK_Java
Should be used for Java.
Definition: Format.h:2668
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:2679
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:2677
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2682
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2406
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2278
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2370
bool RemoveSemicolon
Remove semicolons after the closing brace of a non-empty function.
Definition: Format.h:3308
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2316
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2330
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2259
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:2452
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:2697
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:3291
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2365
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2347
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:553
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:2831
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:1760
bool isCSharp() const
Definition: Format.h:2689
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:978
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:957
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:1643
bool isProto() const
Definition: Format.h:2693
bool isVerilog() const
Definition: Format.h:2692
bool isJavaScript() const
Definition: Format.h:2691
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2295
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1223
@ NI_All
Indent in all namespaces.
Definition: Format.h:2826
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:2816
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2236
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2335
unsigned ColumnLimit
The column limit.
Definition: Format.h:1890
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:247
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:516
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:595
bool isNot(T Kind) const
Definition: FormatToken.h:560
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:266
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:578
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:744
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:39
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:321
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:504
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:407
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:337
bool isStringLiteral() const
Definition: FormatToken.h:593
bool isBinaryOperator() const
Definition: FormatToken.h:672
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:541
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:732
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:553
TokenType getType() const
Returns the token's type, e.g.
Definition: FormatToken.h:375
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:324
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:498
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:501
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:390
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex