clang 18.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "["
51 << "T=" << (unsigned)I->Tok->getType()
52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53 << "\"] ";
54 for (SmallVectorImpl<UnwrappedLine>::const_iterator
55 CI = I->Children.begin(),
56 CE = I->Children.end();
57 CI != CE; ++CI) {
58 OS << "\n";
59 printLine(OS, *CI, (Prefix + " ").str());
60 NewLine = true;
61 }
62 }
63 if (!NewLine)
64 OS << "\n";
65}
66
67LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68 printLine(llvm::dbgs(), Line);
69}
70
71class ScopedDeclarationState {
72public:
73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74 bool MustBeDeclaration)
75 : Line(Line), Stack(Stack) {
76 Line.MustBeDeclaration = MustBeDeclaration;
77 Stack.push_back(MustBeDeclaration);
78 }
79 ~ScopedDeclarationState() {
80 Stack.pop_back();
81 if (!Stack.empty())
82 Line.MustBeDeclaration = Stack.back();
83 else
84 Line.MustBeDeclaration = true;
85 }
86
87private:
88 UnwrappedLine &Line;
89 llvm::BitVector &Stack;
90};
91
92} // end anonymous namespace
93
95public:
97 bool SwitchToPreprocessorLines = false)
98 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99 if (SwitchToPreprocessorLines)
100 Parser.CurrentLines = &Parser.PreprocessorDirectives;
101 else if (!Parser.Line->Tokens.empty())
102 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103 PreBlockLine = std::move(Parser.Line);
104 Parser.Line = std::make_unique<UnwrappedLine>();
105 Parser.Line->Level = PreBlockLine->Level;
106 Parser.Line->PPLevel = PreBlockLine->PPLevel;
107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
109 }
110
112 if (!Parser.Line->Tokens.empty())
113 Parser.addUnwrappedLine();
114 assert(Parser.Line->Tokens.empty());
115 Parser.Line = std::move(PreBlockLine);
116 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117 Parser.MustBreakBeforeNextToken = true;
118 Parser.CurrentLines = OriginalLines;
119 }
120
121private:
123
124 std::unique_ptr<UnwrappedLine> PreBlockLine;
125 SmallVectorImpl<UnwrappedLine> *OriginalLines;
126};
127
129public:
131 const FormatStyle &Style, unsigned &LineLevel)
133 Style.BraceWrapping.AfterControlStatement,
134 Style.BraceWrapping.IndentBraces) {}
136 bool WrapBrace, bool IndentBrace)
137 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138 if (WrapBrace)
139 Parser->addUnwrappedLine();
140 if (IndentBrace)
141 ++LineLevel;
142 }
143 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
144
145private:
146 unsigned &LineLevel;
147 unsigned OldLineLevel;
148};
149
151 SourceManager &SourceMgr, const FormatStyle &Style,
152 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
154 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155 IdentifierTable &IdentTable)
156 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161 ? IG_Rejected
162 : IG_Inited),
163 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
165
166void UnwrappedLineParser::reset() {
167 PPBranchLevel = -1;
168 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169 ? IG_Rejected
170 : IG_Inited;
171 IncludeGuardToken = nullptr;
172 Line.reset(new UnwrappedLine);
173 CommentsBeforeNextToken.clear();
174 FormatTok = nullptr;
175 MustBreakBeforeNextToken = false;
176 PreprocessorDirectives.clear();
177 CurrentLines = &Lines;
178 DeclarationScopeStack.clear();
179 NestedTooDeep.clear();
180 PPStack.clear();
181 Line->FirstStartColumn = FirstStartColumn;
182
183 if (!Unexpanded.empty())
184 for (FormatToken *Token : AllTokens)
185 Token->MacroCtx.reset();
186 CurrentExpandedLines.clear();
187 ExpandedLines.clear();
188 Unexpanded.clear();
189 InExpansion = false;
190 Reconstruct.reset();
191}
192
194 IndexedTokenSource TokenSource(AllTokens);
195 Line->FirstStartColumn = FirstStartColumn;
196 do {
197 LLVM_DEBUG(llvm::dbgs() << "----\n");
198 reset();
199 Tokens = &TokenSource;
200 TokenSource.reset();
201
202 readToken();
203 parseFile();
204
205 // If we found an include guard then all preprocessor directives (other than
206 // the guard) are over-indented by one.
207 if (IncludeGuard == IG_Found) {
208 for (auto &Line : Lines)
209 if (Line.InPPDirective && Line.Level > 0)
210 --Line.Level;
211 }
212
213 // Create line with eof token.
214 assert(FormatTok->is(tok::eof));
215 pushToken(FormatTok);
216 addUnwrappedLine();
217
218 // In a first run, format everything with the lines containing macro calls
219 // replaced by the expansion.
220 if (!ExpandedLines.empty()) {
221 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
222 for (const auto &Line : Lines) {
223 if (!Line.Tokens.empty()) {
224 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
225 if (it != ExpandedLines.end()) {
226 for (const auto &Expanded : it->second) {
227 LLVM_DEBUG(printDebugInfo(Expanded));
228 Callback.consumeUnwrappedLine(Expanded);
229 }
230 continue;
231 }
232 }
233 LLVM_DEBUG(printDebugInfo(Line));
234 Callback.consumeUnwrappedLine(Line);
235 }
236 Callback.finishRun();
237 }
238
239 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
240 for (const UnwrappedLine &Line : Lines) {
241 LLVM_DEBUG(printDebugInfo(Line));
242 Callback.consumeUnwrappedLine(Line);
243 }
244 Callback.finishRun();
245 Lines.clear();
246 while (!PPLevelBranchIndex.empty() &&
247 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
248 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
249 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
250 }
251 if (!PPLevelBranchIndex.empty()) {
252 ++PPLevelBranchIndex.back();
253 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
254 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
255 }
256 } while (!PPLevelBranchIndex.empty());
257}
258
259void UnwrappedLineParser::parseFile() {
260 // The top-level context in a file always has declarations, except for pre-
261 // processor directives and JavaScript files.
262 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
263 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
264 MustBeDeclaration);
266 parseBracedList();
267 else
268 parseLevel();
269 // Make sure to format the remaining tokens.
270 //
271 // LK_TextProto is special since its top-level is parsed as the body of a
272 // braced list, which does not necessarily have natural line separators such
273 // as a semicolon. Comments after the last entry that have been determined to
274 // not belong to that line, as in:
275 // key: value
276 // // endfile comment
277 // do not have a chance to be put on a line of their own until this point.
278 // Here we add this newline before end-of-file comments.
279 if (Style.Language == FormatStyle::LK_TextProto &&
280 !CommentsBeforeNextToken.empty()) {
281 addUnwrappedLine();
282 }
283 flushComments(true);
284 addUnwrappedLine();
285}
286
287void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
288 do {
289 switch (FormatTok->Tok.getKind()) {
290 case tok::l_brace:
291 return;
292 default:
293 if (FormatTok->is(Keywords.kw_where)) {
294 addUnwrappedLine();
295 nextToken();
296 parseCSharpGenericTypeConstraint();
297 break;
298 }
299 nextToken();
300 break;
301 }
302 } while (!eof());
303}
304
305void UnwrappedLineParser::parseCSharpAttribute() {
306 int UnpairedSquareBrackets = 1;
307 do {
308 switch (FormatTok->Tok.getKind()) {
309 case tok::r_square:
310 nextToken();
311 --UnpairedSquareBrackets;
312 if (UnpairedSquareBrackets == 0) {
313 addUnwrappedLine();
314 return;
315 }
316 break;
317 case tok::l_square:
318 ++UnpairedSquareBrackets;
319 nextToken();
320 break;
321 default:
322 nextToken();
323 break;
324 }
325 } while (!eof());
326}
327
328bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
329 if (!Lines.empty() && Lines.back().InPPDirective)
330 return true;
331
332 const FormatToken *Previous = Tokens->getPreviousToken();
333 return Previous && Previous->is(tok::comment) &&
334 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
335}
336
337/// \brief Parses a level, that is ???.
338/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
339/// \param IfKind The \p if statement kind in the level.
340/// \param IfLeftBrace The left brace of the \p if block in the level.
341/// \returns true if a simple block of if/else/for/while, or false otherwise.
342/// (A simple block has a single statement.)
343bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
344 IfStmtKind *IfKind,
345 FormatToken **IfLeftBrace) {
346 const bool InRequiresExpression =
347 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
348 const bool IsPrecededByCommentOrPPDirective =
349 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
350 FormatToken *IfLBrace = nullptr;
351 bool HasDoWhile = false;
352 bool HasLabel = false;
353 unsigned StatementCount = 0;
354 bool SwitchLabelEncountered = false;
355
356 do {
357 if (FormatTok->isAttribute()) {
358 nextToken();
359 continue;
360 }
361 tok::TokenKind kind = FormatTok->Tok.getKind();
362 if (FormatTok->getType() == TT_MacroBlockBegin)
363 kind = tok::l_brace;
364 else if (FormatTok->getType() == TT_MacroBlockEnd)
365 kind = tok::r_brace;
366
367 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
368 &HasLabel, &StatementCount] {
369 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
370 HasDoWhile ? nullptr : &HasDoWhile,
371 HasLabel ? nullptr : &HasLabel);
372 ++StatementCount;
373 assert(StatementCount > 0 && "StatementCount overflow!");
374 };
375
376 switch (kind) {
377 case tok::comment:
378 nextToken();
379 addUnwrappedLine();
380 break;
381 case tok::l_brace:
382 if (InRequiresExpression) {
383 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
384 } else if (FormatTok->Previous &&
385 FormatTok->Previous->ClosesRequiresClause) {
386 // We need the 'default' case here to correctly parse a function
387 // l_brace.
388 ParseDefault();
389 continue;
390 }
391 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
392 tryToParseBracedList()) {
393 continue;
394 }
395 parseBlock();
396 ++StatementCount;
397 assert(StatementCount > 0 && "StatementCount overflow!");
398 addUnwrappedLine();
399 break;
400 case tok::r_brace:
401 if (OpeningBrace) {
402 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
403 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
404 return false;
405 }
406 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
407 HasDoWhile || IsPrecededByCommentOrPPDirective ||
408 precededByCommentOrPPDirective()) {
409 return false;
410 }
411 const FormatToken *Next = Tokens->peekNextToken();
412 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
413 return false;
414 if (IfLeftBrace)
415 *IfLeftBrace = IfLBrace;
416 return true;
417 }
418 nextToken();
419 addUnwrappedLine();
420 break;
421 case tok::kw_default: {
422 unsigned StoredPosition = Tokens->getPosition();
423 FormatToken *Next;
424 do {
425 Next = Tokens->getNextToken();
426 assert(Next);
427 } while (Next->is(tok::comment));
428 FormatTok = Tokens->setPosition(StoredPosition);
429 if (Next->isNot(tok::colon)) {
430 // default not followed by ':' is not a case label; treat it like
431 // an identifier.
432 parseStructuralElement();
433 break;
434 }
435 // Else, if it is 'default:', fall through to the case handling.
436 [[fallthrough]];
437 }
438 case tok::kw_case:
439 if (Style.isProto() || Style.isVerilog() ||
440 (Style.isJavaScript() && Line->MustBeDeclaration)) {
441 // Proto: there are no switch/case statements
442 // Verilog: Case labels don't have this word. We handle case
443 // labels including default in TokenAnnotator.
444 // JavaScript: A 'case: string' style field declaration.
445 ParseDefault();
446 break;
447 }
448 if (!SwitchLabelEncountered &&
449 (Style.IndentCaseLabels ||
450 (Line->InPPDirective && Line->Level == 1))) {
451 ++Line->Level;
452 }
453 SwitchLabelEncountered = true;
454 parseStructuralElement();
455 break;
456 case tok::l_square:
457 if (Style.isCSharp()) {
458 nextToken();
459 parseCSharpAttribute();
460 break;
461 }
462 if (handleCppAttributes())
463 break;
464 [[fallthrough]];
465 default:
466 ParseDefault();
467 break;
468 }
469 } while (!eof());
470
471 return false;
472}
473
474void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
475 // We'll parse forward through the tokens until we hit
476 // a closing brace or eof - note that getNextToken() will
477 // parse macros, so this will magically work inside macro
478 // definitions, too.
479 unsigned StoredPosition = Tokens->getPosition();
480 FormatToken *Tok = FormatTok;
481 const FormatToken *PrevTok = Tok->Previous;
482 // Keep a stack of positions of lbrace tokens. We will
483 // update information about whether an lbrace starts a
484 // braced init list or a different block during the loop.
485 struct StackEntry {
486 FormatToken *Tok;
487 const FormatToken *PrevTok;
488 };
489 SmallVector<StackEntry, 8> LBraceStack;
490 assert(Tok->is(tok::l_brace));
491 do {
492 // Get next non-comment token.
493 FormatToken *NextTok;
494 do {
495 NextTok = Tokens->getNextToken();
496 } while (NextTok->is(tok::comment));
497
498 switch (Tok->Tok.getKind()) {
499 case tok::l_brace:
500 if (Style.isJavaScript() && PrevTok) {
501 if (PrevTok->isOneOf(tok::colon, tok::less)) {
502 // A ':' indicates this code is in a type, or a braced list
503 // following a label in an object literal ({a: {b: 1}}).
504 // A '<' could be an object used in a comparison, but that is nonsense
505 // code (can never return true), so more likely it is a generic type
506 // argument (`X<{a: string; b: number}>`).
507 // The code below could be confused by semicolons between the
508 // individual members in a type member list, which would normally
509 // trigger BK_Block. In both cases, this must be parsed as an inline
510 // braced init.
512 } else if (PrevTok->is(tok::r_paren)) {
513 // `) { }` can only occur in function or method declarations in JS.
514 Tok->setBlockKind(BK_Block);
515 }
516 } else {
517 Tok->setBlockKind(BK_Unknown);
518 }
519 LBraceStack.push_back({Tok, PrevTok});
520 break;
521 case tok::r_brace:
522 if (LBraceStack.empty())
523 break;
524 if (LBraceStack.back().Tok->is(BK_Unknown)) {
525 bool ProbablyBracedList = false;
526 if (Style.Language == FormatStyle::LK_Proto) {
527 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
528 } else {
529 // Skip NextTok over preprocessor lines, otherwise we may not
530 // properly diagnose the block as a braced intializer
531 // if the comma separator appears after the pp directive.
532 while (NextTok->is(tok::hash)) {
533 ScopedMacroState MacroState(*Line, Tokens, NextTok);
534 do {
535 NextTok = Tokens->getNextToken();
536 } while (NextTok->isNot(tok::eof));
537 }
538
539 // Using OriginalColumn to distinguish between ObjC methods and
540 // binary operators is a bit hacky.
541 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
542 NextTok->OriginalColumn == 0;
543
544 // Try to detect a braced list. Note that regardless how we mark inner
545 // braces here, we will overwrite the BlockKind later if we parse a
546 // braced list (where all blocks inside are by default braced lists),
547 // or when we explicitly detect blocks (for example while parsing
548 // lambdas).
549
550 // If we already marked the opening brace as braced list, the closing
551 // must also be part of it.
552 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
553
554 ProbablyBracedList = ProbablyBracedList ||
555 (Style.isJavaScript() &&
556 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
557 Keywords.kw_as));
558 ProbablyBracedList = ProbablyBracedList ||
559 (Style.isCpp() && NextTok->is(tok::l_paren));
560
561 // If there is a comma, semicolon or right paren after the closing
562 // brace, we assume this is a braced initializer list.
563 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
564 // braced list in JS.
565 ProbablyBracedList =
566 ProbablyBracedList ||
567 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
568 tok::r_paren, tok::r_square, tok::ellipsis);
569
570 // Distinguish between braced list in a constructor initializer list
571 // followed by constructor body, or just adjacent blocks.
572 ProbablyBracedList =
573 ProbablyBracedList ||
574 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
575 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
576 tok::greater));
577
578 ProbablyBracedList =
579 ProbablyBracedList ||
580 (NextTok->is(tok::identifier) &&
581 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
582
583 ProbablyBracedList = ProbablyBracedList ||
584 (NextTok->is(tok::semi) &&
585 (!ExpectClassBody || LBraceStack.size() != 1));
586
587 ProbablyBracedList =
588 ProbablyBracedList ||
589 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
590
591 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
592 // We can have an array subscript after a braced init
593 // list, but C++11 attributes are expected after blocks.
594 NextTok = Tokens->getNextToken();
595 ProbablyBracedList = NextTok->isNot(tok::l_square);
596 }
597 }
598 if (ProbablyBracedList) {
599 Tok->setBlockKind(BK_BracedInit);
600 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
601 } else {
602 Tok->setBlockKind(BK_Block);
603 LBraceStack.back().Tok->setBlockKind(BK_Block);
604 }
605 }
606 LBraceStack.pop_back();
607 break;
608 case tok::identifier:
609 if (Tok->isNot(TT_StatementMacro))
610 break;
611 [[fallthrough]];
612 case tok::kw_if:
613 if (PrevTok->is(tok::hash))
614 break;
615 [[fallthrough]];
616 case tok::at:
617 case tok::semi:
618 case tok::kw_while:
619 case tok::kw_for:
620 case tok::kw_switch:
621 case tok::kw_try:
622 case tok::kw___try:
623 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
624 LBraceStack.back().Tok->setBlockKind(BK_Block);
625 break;
626 default:
627 break;
628 }
629 PrevTok = Tok;
630 Tok = NextTok;
631 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
632
633 // Assume other blocks for all unclosed opening braces.
634 for (const auto &Entry : LBraceStack)
635 if (Entry.Tok->is(BK_Unknown))
636 Entry.Tok->setBlockKind(BK_Block);
637
638 FormatTok = Tokens->setPosition(StoredPosition);
639}
640
641template <class T>
642static inline void hash_combine(std::size_t &seed, const T &v) {
643 std::hash<T> hasher;
644 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
645}
646
647size_t UnwrappedLineParser::computePPHash() const {
648 size_t h = 0;
649 for (const auto &i : PPStack) {
650 hash_combine(h, size_t(i.Kind));
651 hash_combine(h, i.Line);
652 }
653 return h;
654}
655
656// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
657// is not null, subtracts its length (plus the preceding space) when computing
658// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
659// running the token annotator on it so that we can restore them afterward.
660bool UnwrappedLineParser::mightFitOnOneLine(
661 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
662 const auto ColumnLimit = Style.ColumnLimit;
663 if (ColumnLimit == 0)
664 return true;
665
666 auto &Tokens = ParsedLine.Tokens;
667 assert(!Tokens.empty());
668
669 const auto *LastToken = Tokens.back().Tok;
670 assert(LastToken);
671
672 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
673
674 int Index = 0;
675 for (const auto &Token : Tokens) {
676 assert(Token.Tok);
677 auto &SavedToken = SavedTokens[Index++];
678 SavedToken.Tok = new FormatToken;
679 SavedToken.Tok->copyFrom(*Token.Tok);
680 SavedToken.Children = std::move(Token.Children);
681 }
682
683 AnnotatedLine Line(ParsedLine);
684 assert(Line.Last == LastToken);
685
686 TokenAnnotator Annotator(Style, Keywords);
687 Annotator.annotate(Line);
688 Annotator.calculateFormattingInformation(Line);
689
690 auto Length = LastToken->TotalLength;
691 if (OpeningBrace) {
692 assert(OpeningBrace != Tokens.front().Tok);
693 if (auto Prev = OpeningBrace->Previous;
694 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
695 Length -= ColumnLimit;
696 }
697 Length -= OpeningBrace->TokenText.size() + 1;
698 }
699
700 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
701 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
702 Length -= FirstToken->TokenText.size() + 1;
703 }
704
705 Index = 0;
706 for (auto &Token : Tokens) {
707 const auto &SavedToken = SavedTokens[Index++];
708 Token.Tok->copyFrom(*SavedToken.Tok);
709 Token.Children = std::move(SavedToken.Children);
710 delete SavedToken.Tok;
711 }
712
713 // If these change PPLevel needs to be used for get correct indentation.
714 assert(!Line.InMacroBody);
715 assert(!Line.InPPDirective);
716 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
717}
718
719FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
720 unsigned AddLevels, bool MunchSemi,
721 bool KeepBraces,
722 IfStmtKind *IfKind,
723 bool UnindentWhitesmithsBraces) {
724 auto HandleVerilogBlockLabel = [this]() {
725 // ":" name
726 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
727 nextToken();
728 if (Keywords.isVerilogIdentifier(*FormatTok))
729 nextToken();
730 }
731 };
732
733 // Whether this is a Verilog-specific block that has a special header like a
734 // module.
735 const bool VerilogHierarchy =
736 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
737 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
738 (Style.isVerilog() &&
739 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
740 "'{' or macro block token expected");
741 FormatToken *Tok = FormatTok;
742 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
743 auto Index = CurrentLines->size();
744 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
745 FormatTok->setBlockKind(BK_Block);
746
747 // For Whitesmiths mode, jump to the next level prior to skipping over the
748 // braces.
749 if (!VerilogHierarchy && AddLevels > 0 &&
751 ++Line->Level;
752 }
753
754 size_t PPStartHash = computePPHash();
755
756 const unsigned InitialLevel = Line->Level;
757 if (VerilogHierarchy) {
758 AddLevels += parseVerilogHierarchyHeader();
759 } else {
760 nextToken(/*LevelDifference=*/AddLevels);
761 HandleVerilogBlockLabel();
762 }
763
764 // Bail out if there are too many levels. Otherwise, the stack might overflow.
765 if (Line->Level > 300)
766 return nullptr;
767
768 if (MacroBlock && FormatTok->is(tok::l_paren))
769 parseParens();
770
771 size_t NbPreprocessorDirectives =
772 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
773 addUnwrappedLine();
774 size_t OpeningLineIndex =
775 CurrentLines->empty()
777 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
778
779 // Whitesmiths is weird here. The brace needs to be indented for the namespace
780 // block, but the block itself may not be indented depending on the style
781 // settings. This allows the format to back up one level in those cases.
782 if (UnindentWhitesmithsBraces)
783 --Line->Level;
784
785 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
786 MustBeDeclaration);
787 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
788 Line->Level += AddLevels;
789
790 FormatToken *IfLBrace = nullptr;
791 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
792
793 if (eof())
794 return IfLBrace;
795
796 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
797 : FormatTok->isNot(tok::r_brace)) {
798 Line->Level = InitialLevel;
799 FormatTok->setBlockKind(BK_Block);
800 return IfLBrace;
801 }
802
803 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
804 FormatTok->setFinalizedType(TT_NamespaceRBrace);
805
806 const bool IsFunctionRBrace =
807 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
808
809 auto RemoveBraces = [=]() mutable {
810 if (!SimpleBlock)
811 return false;
812 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
813 assert(FormatTok->is(tok::r_brace));
814 const bool WrappedOpeningBrace = !Tok->Previous;
815 if (WrappedOpeningBrace && FollowedByComment)
816 return false;
817 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
818 if (KeepBraces && !HasRequiredIfBraces)
819 return false;
820 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
821 const FormatToken *Previous = Tokens->getPreviousToken();
822 assert(Previous);
823 if (Previous->is(tok::r_brace) && !Previous->Optional)
824 return false;
825 }
826 assert(!CurrentLines->empty());
827 auto &LastLine = CurrentLines->back();
828 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
829 return false;
830 if (Tok->is(TT_ElseLBrace))
831 return true;
832 if (WrappedOpeningBrace) {
833 assert(Index > 0);
834 --Index; // The line above the wrapped l_brace.
835 Tok = nullptr;
836 }
837 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
838 };
839 if (RemoveBraces()) {
840 Tok->MatchingParen = FormatTok;
841 FormatTok->MatchingParen = Tok;
842 }
843
844 size_t PPEndHash = computePPHash();
845
846 // Munch the closing brace.
847 nextToken(/*LevelDifference=*/-AddLevels);
848
849 // When this is a function block and there is an unnecessary semicolon
850 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
851 // it later).
852 if (Style.RemoveSemicolon && IsFunctionRBrace) {
853 while (FormatTok->is(tok::semi)) {
854 FormatTok->Optional = true;
855 nextToken();
856 }
857 }
858
859 HandleVerilogBlockLabel();
860
861 if (MacroBlock && FormatTok->is(tok::l_paren))
862 parseParens();
863
864 Line->Level = InitialLevel;
865
866 if (FormatTok->is(tok::kw_noexcept)) {
867 // A noexcept in a requires expression.
868 nextToken();
869 }
870
871 if (FormatTok->is(tok::arrow)) {
872 // Following the } or noexcept we can find a trailing return type arrow
873 // as part of an implicit conversion constraint.
874 nextToken();
875 parseStructuralElement();
876 }
877
878 if (MunchSemi && FormatTok->is(tok::semi))
879 nextToken();
880
881 if (PPStartHash == PPEndHash) {
882 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
883 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
884 // Update the opening line to add the forward reference as well
885 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
886 CurrentLines->size() - 1;
887 }
888 }
889
890 return IfLBrace;
891}
892
893static bool isGoogScope(const UnwrappedLine &Line) {
894 // FIXME: Closure-library specific stuff should not be hard-coded but be
895 // configurable.
896 if (Line.Tokens.size() < 4)
897 return false;
898 auto I = Line.Tokens.begin();
899 if (I->Tok->TokenText != "goog")
900 return false;
901 ++I;
902 if (I->Tok->isNot(tok::period))
903 return false;
904 ++I;
905 if (I->Tok->TokenText != "scope")
906 return false;
907 ++I;
908 return I->Tok->is(tok::l_paren);
909}
910
911static bool isIIFE(const UnwrappedLine &Line,
912 const AdditionalKeywords &Keywords) {
913 // Look for the start of an immediately invoked anonymous function.
914 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
915 // This is commonly done in JavaScript to create a new, anonymous scope.
916 // Example: (function() { ... })()
917 if (Line.Tokens.size() < 3)
918 return false;
919 auto I = Line.Tokens.begin();
920 if (I->Tok->isNot(tok::l_paren))
921 return false;
922 ++I;
923 if (I->Tok->isNot(Keywords.kw_function))
924 return false;
925 ++I;
926 return I->Tok->is(tok::l_paren);
927}
928
929static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
930 const FormatToken &InitialToken) {
931 tok::TokenKind Kind = InitialToken.Tok.getKind();
932 if (InitialToken.is(TT_NamespaceMacro))
933 Kind = tok::kw_namespace;
934
935 switch (Kind) {
936 case tok::kw_namespace:
937 return Style.BraceWrapping.AfterNamespace;
938 case tok::kw_class:
939 return Style.BraceWrapping.AfterClass;
940 case tok::kw_union:
941 return Style.BraceWrapping.AfterUnion;
942 case tok::kw_struct:
943 return Style.BraceWrapping.AfterStruct;
944 case tok::kw_enum:
945 return Style.BraceWrapping.AfterEnum;
946 default:
947 return false;
948 }
949}
950
951void UnwrappedLineParser::parseChildBlock() {
952 assert(FormatTok->is(tok::l_brace));
953 FormatTok->setBlockKind(BK_Block);
954 const FormatToken *OpeningBrace = FormatTok;
955 nextToken();
956 {
957 bool SkipIndent = (Style.isJavaScript() &&
958 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
959 ScopedLineState LineState(*this);
960 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
961 /*MustBeDeclaration=*/false);
962 Line->Level += SkipIndent ? 0 : 1;
963 parseLevel(OpeningBrace);
964 flushComments(isOnNewLine(*FormatTok));
965 Line->Level -= SkipIndent ? 0 : 1;
966 }
967 nextToken();
968}
969
970void UnwrappedLineParser::parsePPDirective() {
971 assert(FormatTok->is(tok::hash) && "'#' expected");
972 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
973
974 nextToken();
975
976 if (!FormatTok->Tok.getIdentifierInfo()) {
977 parsePPUnknown();
978 return;
979 }
980
981 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
982 case tok::pp_define:
983 parsePPDefine();
984 return;
985 case tok::pp_if:
986 parsePPIf(/*IfDef=*/false);
987 break;
988 case tok::pp_ifdef:
989 case tok::pp_ifndef:
990 parsePPIf(/*IfDef=*/true);
991 break;
992 case tok::pp_else:
993 case tok::pp_elifdef:
994 case tok::pp_elifndef:
995 case tok::pp_elif:
996 parsePPElse();
997 break;
998 case tok::pp_endif:
999 parsePPEndIf();
1000 break;
1001 case tok::pp_pragma:
1002 parsePPPragma();
1003 break;
1004 default:
1005 parsePPUnknown();
1006 break;
1007 }
1008}
1009
1010void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1011 size_t Line = CurrentLines->size();
1012 if (CurrentLines == &PreprocessorDirectives)
1013 Line += Lines.size();
1014
1015 if (Unreachable ||
1016 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1017 PPStack.push_back({PP_Unreachable, Line});
1018 } else {
1019 PPStack.push_back({PP_Conditional, Line});
1020 }
1021}
1022
1023void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1024 ++PPBranchLevel;
1025 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1026 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1027 PPLevelBranchIndex.push_back(0);
1028 PPLevelBranchCount.push_back(0);
1029 }
1030 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1031 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1032 conditionalCompilationCondition(Unreachable || Skip);
1033}
1034
1035void UnwrappedLineParser::conditionalCompilationAlternative() {
1036 if (!PPStack.empty())
1037 PPStack.pop_back();
1038 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1039 if (!PPChainBranchIndex.empty())
1040 ++PPChainBranchIndex.top();
1041 conditionalCompilationCondition(
1042 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1043 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1044}
1045
1046void UnwrappedLineParser::conditionalCompilationEnd() {
1047 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1048 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1049 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1050 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1051 }
1052 // Guard against #endif's without #if.
1053 if (PPBranchLevel > -1)
1054 --PPBranchLevel;
1055 if (!PPChainBranchIndex.empty())
1056 PPChainBranchIndex.pop();
1057 if (!PPStack.empty())
1058 PPStack.pop_back();
1059}
1060
1061void UnwrappedLineParser::parsePPIf(bool IfDef) {
1062 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1063 nextToken();
1064 bool Unreachable = false;
1065 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1066 Unreachable = true;
1067 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1068 Unreachable = true;
1069 conditionalCompilationStart(Unreachable);
1070 FormatToken *IfCondition = FormatTok;
1071 // If there's a #ifndef on the first line, and the only lines before it are
1072 // comments, it could be an include guard.
1073 bool MaybeIncludeGuard = IfNDef;
1074 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1075 for (auto &Line : Lines) {
1076 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1077 MaybeIncludeGuard = false;
1078 IncludeGuard = IG_Rejected;
1079 break;
1080 }
1081 }
1082 }
1083 --PPBranchLevel;
1084 parsePPUnknown();
1085 ++PPBranchLevel;
1086 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1087 IncludeGuard = IG_IfNdefed;
1088 IncludeGuardToken = IfCondition;
1089 }
1090}
1091
1092void UnwrappedLineParser::parsePPElse() {
1093 // If a potential include guard has an #else, it's not an include guard.
1094 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1095 IncludeGuard = IG_Rejected;
1096 // Don't crash when there is an #else without an #if.
1097 assert(PPBranchLevel >= -1);
1098 if (PPBranchLevel == -1)
1099 conditionalCompilationStart(/*Unreachable=*/true);
1100 conditionalCompilationAlternative();
1101 --PPBranchLevel;
1102 parsePPUnknown();
1103 ++PPBranchLevel;
1104}
1105
1106void UnwrappedLineParser::parsePPEndIf() {
1107 conditionalCompilationEnd();
1108 parsePPUnknown();
1109 // If the #endif of a potential include guard is the last thing in the file,
1110 // then we found an include guard.
1111 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1113 IncludeGuard = IG_Found;
1114 }
1115}
1116
1117void UnwrappedLineParser::parsePPDefine() {
1118 nextToken();
1119
1120 if (!FormatTok->Tok.getIdentifierInfo()) {
1121 IncludeGuard = IG_Rejected;
1122 IncludeGuardToken = nullptr;
1123 parsePPUnknown();
1124 return;
1125 }
1126
1127 if (IncludeGuard == IG_IfNdefed &&
1128 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1129 IncludeGuard = IG_Defined;
1130 IncludeGuardToken = nullptr;
1131 for (auto &Line : Lines) {
1132 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1133 IncludeGuard = IG_Rejected;
1134 break;
1135 }
1136 }
1137 }
1138
1139 // In the context of a define, even keywords should be treated as normal
1140 // identifiers. Setting the kind to identifier is not enough, because we need
1141 // to treat additional keywords like __except as well, which are already
1142 // identifiers. Setting the identifier info to null interferes with include
1143 // guard processing above, and changes preprocessing nesting.
1144 FormatTok->Tok.setKind(tok::identifier);
1146 nextToken();
1147 if (FormatTok->Tok.getKind() == tok::l_paren &&
1148 !FormatTok->hasWhitespaceBefore()) {
1149 parseParens();
1150 }
1152 Line->Level += PPBranchLevel + 1;
1153 addUnwrappedLine();
1154 ++Line->Level;
1155
1156 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1157 assert((int)Line->PPLevel >= 0);
1158 Line->InMacroBody = true;
1159
1160 // Errors during a preprocessor directive can only affect the layout of the
1161 // preprocessor directive, and thus we ignore them. An alternative approach
1162 // would be to use the same approach we use on the file level (no
1163 // re-indentation if there was a structural error) within the macro
1164 // definition.
1165 parseFile();
1166}
1167
1168void UnwrappedLineParser::parsePPPragma() {
1169 Line->InPragmaDirective = true;
1170 parsePPUnknown();
1171}
1172
1173void UnwrappedLineParser::parsePPUnknown() {
1174 do {
1175 nextToken();
1176 } while (!eof());
1178 Line->Level += PPBranchLevel + 1;
1179 addUnwrappedLine();
1180}
1181
1182// Here we exclude certain tokens that are not usually the first token in an
1183// unwrapped line. This is used in attempt to distinguish macro calls without
1184// trailing semicolons from other constructs split to several lines.
1185static bool tokenCanStartNewLine(const FormatToken &Tok) {
1186 // Semicolon can be a null-statement, l_square can be a start of a macro or
1187 // a C++11 attribute, but this doesn't seem to be common.
1188 assert(Tok.isNot(TT_AttributeSquare));
1189 return !Tok.isOneOf(tok::semi, tok::l_brace,
1190 // Tokens that can only be used as binary operators and a
1191 // part of overloaded operator names.
1192 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1193 tok::less, tok::greater, tok::slash, tok::percent,
1194 tok::lessless, tok::greatergreater, tok::equal,
1195 tok::plusequal, tok::minusequal, tok::starequal,
1196 tok::slashequal, tok::percentequal, tok::ampequal,
1197 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1198 tok::lesslessequal,
1199 // Colon is used in labels, base class lists, initializer
1200 // lists, range-based for loops, ternary operator, but
1201 // should never be the first token in an unwrapped line.
1202 tok::colon,
1203 // 'noexcept' is a trailing annotation.
1204 tok::kw_noexcept);
1205}
1206
1207static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1208 const FormatToken *FormatTok) {
1209 // FIXME: This returns true for C/C++ keywords like 'struct'.
1210 return FormatTok->is(tok::identifier) &&
1211 (!FormatTok->Tok.getIdentifierInfo() ||
1212 !FormatTok->isOneOf(
1213 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1214 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1215 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1216 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1217 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1218 Keywords.kw_instanceof, Keywords.kw_interface,
1219 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1220}
1221
1222static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1223 const FormatToken *FormatTok) {
1224 return FormatTok->Tok.isLiteral() ||
1225 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1226 mustBeJSIdent(Keywords, FormatTok);
1227}
1228
1229// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1230// when encountered after a value (see mustBeJSIdentOrValue).
1231static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1232 const FormatToken *FormatTok) {
1233 return FormatTok->isOneOf(
1234 tok::kw_return, Keywords.kw_yield,
1235 // conditionals
1236 tok::kw_if, tok::kw_else,
1237 // loops
1238 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1239 // switch/case
1240 tok::kw_switch, tok::kw_case,
1241 // exceptions
1242 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1243 // declaration
1244 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1245 Keywords.kw_async, Keywords.kw_function,
1246 // import/export
1247 Keywords.kw_import, tok::kw_export);
1248}
1249
1250// Checks whether a token is a type in K&R C (aka C78).
1251static bool isC78Type(const FormatToken &Tok) {
1252 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1253 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1254 tok::identifier);
1255}
1256
1257// This function checks whether a token starts the first parameter declaration
1258// in a K&R C (aka C78) function definition, e.g.:
1259// int f(a, b)
1260// short a, b;
1261// {
1262// return a + b;
1263// }
1264static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1265 const FormatToken *FuncName) {
1266 assert(Tok);
1267 assert(Next);
1268 assert(FuncName);
1269
1270 if (FuncName->isNot(tok::identifier))
1271 return false;
1272
1273 const FormatToken *Prev = FuncName->Previous;
1274 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1275 return false;
1276
1277 if (!isC78Type(*Tok) &&
1278 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1279 return false;
1280 }
1281
1282 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1283 return false;
1284
1285 Tok = Tok->Previous;
1286 if (!Tok || Tok->isNot(tok::r_paren))
1287 return false;
1288
1289 Tok = Tok->Previous;
1290 if (!Tok || Tok->isNot(tok::identifier))
1291 return false;
1292
1293 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1294}
1295
1296bool UnwrappedLineParser::parseModuleImport() {
1297 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1298
1299 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1300 !Token->Tok.getIdentifierInfo() &&
1301 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1302 return false;
1303 }
1304
1305 nextToken();
1306 while (!eof()) {
1307 if (FormatTok->is(tok::colon)) {
1308 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1309 }
1310 // Handle import <foo/bar.h> as we would an include statement.
1311 else if (FormatTok->is(tok::less)) {
1312 nextToken();
1313 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1314 // Mark tokens up to the trailing line comments as implicit string
1315 // literals.
1316 if (FormatTok->isNot(tok::comment) &&
1317 !FormatTok->TokenText.startswith("//")) {
1318 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1319 }
1320 nextToken();
1321 }
1322 }
1323 if (FormatTok->is(tok::semi)) {
1324 nextToken();
1325 break;
1326 }
1327 nextToken();
1328 }
1329
1330 addUnwrappedLine();
1331 return true;
1332}
1333
1334// readTokenWithJavaScriptASI reads the next token and terminates the current
1335// line if JavaScript Automatic Semicolon Insertion must
1336// happen between the current token and the next token.
1337//
1338// This method is conservative - it cannot cover all edge cases of JavaScript,
1339// but only aims to correctly handle certain well known cases. It *must not*
1340// return true in speculative cases.
1341void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1342 FormatToken *Previous = FormatTok;
1343 readToken();
1344 FormatToken *Next = FormatTok;
1345
1346 bool IsOnSameLine =
1347 CommentsBeforeNextToken.empty()
1348 ? Next->NewlinesBefore == 0
1349 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1350 if (IsOnSameLine)
1351 return;
1352
1353 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1354 bool PreviousStartsTemplateExpr =
1355 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1356 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1357 // If the line contains an '@' sign, the previous token might be an
1358 // annotation, which can precede another identifier/value.
1359 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1360 return LineNode.Tok->is(tok::at);
1361 });
1362 if (HasAt)
1363 return;
1364 }
1365 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1366 return addUnwrappedLine();
1367 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1368 bool NextEndsTemplateExpr =
1369 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1370 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1371 (PreviousMustBeValue ||
1372 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1373 tok::minusminus))) {
1374 return addUnwrappedLine();
1375 }
1376 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1377 isJSDeclOrStmt(Keywords, Next)) {
1378 return addUnwrappedLine();
1379 }
1380}
1381
1382void UnwrappedLineParser::parseStructuralElement(
1383 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1384 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1385 if (Style.Language == FormatStyle::LK_TableGen &&
1386 FormatTok->is(tok::pp_include)) {
1387 nextToken();
1388 if (FormatTok->is(tok::string_literal))
1389 nextToken();
1390 addUnwrappedLine();
1391 return;
1392 }
1393
1394 if (Style.isCpp()) {
1395 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1396 }
1397 } else if (Style.isVerilog()) {
1398 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1399 parseForOrWhileLoop(/*HasParens=*/false);
1400 return;
1401 }
1402 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1403 parseForOrWhileLoop();
1404 return;
1405 }
1406 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1407 Keywords.kw_assume, Keywords.kw_cover)) {
1408 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1409 return;
1410 }
1411
1412 // Skip things that can exist before keywords like 'if' and 'case'.
1413 while (true) {
1414 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1415 Keywords.kw_unique0)) {
1416 nextToken();
1417 } else if (FormatTok->is(tok::l_paren) &&
1418 Tokens->peekNextToken()->is(tok::star)) {
1419 parseParens();
1420 } else {
1421 break;
1422 }
1423 }
1424 }
1425
1426 // Tokens that only make sense at the beginning of a line.
1427 switch (FormatTok->Tok.getKind()) {
1428 case tok::kw_asm:
1429 nextToken();
1430 if (FormatTok->is(tok::l_brace)) {
1431 FormatTok->setFinalizedType(TT_InlineASMBrace);
1432 nextToken();
1433 while (FormatTok && !eof()) {
1434 if (FormatTok->is(tok::r_brace)) {
1435 FormatTok->setFinalizedType(TT_InlineASMBrace);
1436 nextToken();
1437 addUnwrappedLine();
1438 break;
1439 }
1440 FormatTok->Finalized = true;
1441 nextToken();
1442 }
1443 }
1444 break;
1445 case tok::kw_namespace:
1446 parseNamespace();
1447 return;
1448 case tok::kw_public:
1449 case tok::kw_protected:
1450 case tok::kw_private:
1451 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1452 Style.isCSharp()) {
1453 nextToken();
1454 } else {
1455 parseAccessSpecifier();
1456 }
1457 return;
1458 case tok::kw_if: {
1459 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1460 // field/method declaration.
1461 break;
1462 }
1463 FormatToken *Tok = parseIfThenElse(IfKind);
1464 if (IfLeftBrace)
1465 *IfLeftBrace = Tok;
1466 return;
1467 }
1468 case tok::kw_for:
1469 case tok::kw_while:
1470 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1471 // field/method declaration.
1472 break;
1473 }
1474 parseForOrWhileLoop();
1475 return;
1476 case tok::kw_do:
1477 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1478 // field/method declaration.
1479 break;
1480 }
1481 parseDoWhile();
1482 if (HasDoWhile)
1483 *HasDoWhile = true;
1484 return;
1485 case tok::kw_switch:
1486 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1487 // 'switch: string' field declaration.
1488 break;
1489 }
1490 parseSwitch();
1491 return;
1492 case tok::kw_default:
1493 // In Verilog default along with other labels are handled in the next loop.
1494 if (Style.isVerilog())
1495 break;
1496 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497 // 'default: string' field declaration.
1498 break;
1499 }
1500 nextToken();
1501 if (FormatTok->is(tok::colon)) {
1502 FormatTok->setFinalizedType(TT_CaseLabelColon);
1503 parseLabel();
1504 return;
1505 }
1506 // e.g. "default void f() {}" in a Java interface.
1507 break;
1508 case tok::kw_case:
1509 // Proto: there are no switch/case statements.
1510 if (Style.isProto()) {
1511 nextToken();
1512 return;
1513 }
1514 if (Style.isVerilog()) {
1515 parseBlock();
1516 addUnwrappedLine();
1517 return;
1518 }
1519 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1520 // 'case: string' field declaration.
1521 nextToken();
1522 break;
1523 }
1524 parseCaseLabel();
1525 return;
1526 case tok::kw_try:
1527 case tok::kw___try:
1528 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1529 // field/method declaration.
1530 break;
1531 }
1532 parseTryCatch();
1533 return;
1534 case tok::kw_extern:
1535 nextToken();
1536 if (Style.isVerilog()) {
1537 // In Verilog and extern module declaration looks like a start of module.
1538 // But there is no body and endmodule. So we handle it separately.
1539 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1540 parseVerilogHierarchyHeader();
1541 return;
1542 }
1543 } else if (FormatTok->is(tok::string_literal)) {
1544 nextToken();
1545 if (FormatTok->is(tok::l_brace)) {
1547 addUnwrappedLine();
1548 // Either we indent or for backwards compatibility we follow the
1549 // AfterExternBlock style.
1550 unsigned AddLevels =
1553 Style.IndentExternBlock ==
1555 ? 1u
1556 : 0u;
1557 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1558 addUnwrappedLine();
1559 return;
1560 }
1561 }
1562 break;
1563 case tok::kw_export:
1564 if (Style.isJavaScript()) {
1565 parseJavaScriptEs6ImportExport();
1566 return;
1567 }
1568 if (Style.isCpp()) {
1569 nextToken();
1570 if (FormatTok->is(tok::kw_namespace)) {
1571 parseNamespace();
1572 return;
1573 }
1574 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1575 return;
1576 }
1577 break;
1578 case tok::kw_inline:
1579 nextToken();
1580 if (FormatTok->is(tok::kw_namespace)) {
1581 parseNamespace();
1582 return;
1583 }
1584 break;
1585 case tok::identifier:
1586 if (FormatTok->is(TT_ForEachMacro)) {
1587 parseForOrWhileLoop();
1588 return;
1589 }
1590 if (FormatTok->is(TT_MacroBlockBegin)) {
1591 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1592 /*MunchSemi=*/false);
1593 return;
1594 }
1595 if (FormatTok->is(Keywords.kw_import)) {
1596 if (Style.isJavaScript()) {
1597 parseJavaScriptEs6ImportExport();
1598 return;
1599 }
1600 if (Style.Language == FormatStyle::LK_Proto) {
1601 nextToken();
1602 if (FormatTok->is(tok::kw_public))
1603 nextToken();
1604 if (FormatTok->isNot(tok::string_literal))
1605 return;
1606 nextToken();
1607 if (FormatTok->is(tok::semi))
1608 nextToken();
1609 addUnwrappedLine();
1610 return;
1611 }
1612 if (Style.isCpp() && parseModuleImport())
1613 return;
1614 }
1615 if (Style.isCpp() &&
1616 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1617 Keywords.kw_slots, Keywords.kw_qslots)) {
1618 nextToken();
1619 if (FormatTok->is(tok::colon)) {
1620 nextToken();
1621 addUnwrappedLine();
1622 return;
1623 }
1624 }
1625 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1626 parseStatementMacro();
1627 return;
1628 }
1629 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1630 parseNamespace();
1631 return;
1632 }
1633 // In Verilog labels can be any expression, so we don't do them here.
1634 if (!Style.isVerilog() && Tokens->peekNextToken()->is(tok::colon) &&
1635 !Line->MustBeDeclaration) {
1636 nextToken();
1637 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1638 FormatTok->setFinalizedType(TT_GotoLabelColon);
1639 parseLabel(!Style.IndentGotoLabels);
1640 if (HasLabel)
1641 *HasLabel = true;
1642 return;
1643 }
1644 // In all other cases, parse the declaration.
1645 break;
1646 default:
1647 break;
1648 }
1649
1650 const bool InRequiresExpression =
1651 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1652 do {
1653 const FormatToken *Previous = FormatTok->Previous;
1654 switch (FormatTok->Tok.getKind()) {
1655 case tok::at:
1656 nextToken();
1657 if (FormatTok->is(tok::l_brace)) {
1658 nextToken();
1659 parseBracedList();
1660 break;
1661 } else if (Style.Language == FormatStyle::LK_Java &&
1662 FormatTok->is(Keywords.kw_interface)) {
1663 nextToken();
1664 break;
1665 }
1666 switch (FormatTok->Tok.getObjCKeywordID()) {
1667 case tok::objc_public:
1668 case tok::objc_protected:
1669 case tok::objc_package:
1670 case tok::objc_private:
1671 return parseAccessSpecifier();
1672 case tok::objc_interface:
1673 case tok::objc_implementation:
1674 return parseObjCInterfaceOrImplementation();
1675 case tok::objc_protocol:
1676 if (parseObjCProtocol())
1677 return;
1678 break;
1679 case tok::objc_end:
1680 return; // Handled by the caller.
1681 case tok::objc_optional:
1682 case tok::objc_required:
1683 nextToken();
1684 addUnwrappedLine();
1685 return;
1686 case tok::objc_autoreleasepool:
1687 nextToken();
1688 if (FormatTok->is(tok::l_brace)) {
1691 addUnwrappedLine();
1692 }
1693 parseBlock();
1694 }
1695 addUnwrappedLine();
1696 return;
1697 case tok::objc_synchronized:
1698 nextToken();
1699 if (FormatTok->is(tok::l_paren)) {
1700 // Skip synchronization object
1701 parseParens();
1702 }
1703 if (FormatTok->is(tok::l_brace)) {
1706 addUnwrappedLine();
1707 }
1708 parseBlock();
1709 }
1710 addUnwrappedLine();
1711 return;
1712 case tok::objc_try:
1713 // This branch isn't strictly necessary (the kw_try case below would
1714 // do this too after the tok::at is parsed above). But be explicit.
1715 parseTryCatch();
1716 return;
1717 default:
1718 break;
1719 }
1720 break;
1721 case tok::kw_requires: {
1722 if (Style.isCpp()) {
1723 bool ParsedClause = parseRequires();
1724 if (ParsedClause)
1725 return;
1726 } else {
1727 nextToken();
1728 }
1729 break;
1730 }
1731 case tok::kw_enum:
1732 // Ignore if this is part of "template <enum ...".
1733 if (Previous && Previous->is(tok::less)) {
1734 nextToken();
1735 break;
1736 }
1737
1738 // parseEnum falls through and does not yet add an unwrapped line as an
1739 // enum definition can start a structural element.
1740 if (!parseEnum())
1741 break;
1742 // This only applies to C++ and Verilog.
1743 if (!Style.isCpp() && !Style.isVerilog()) {
1744 addUnwrappedLine();
1745 return;
1746 }
1747 break;
1748 case tok::kw_typedef:
1749 nextToken();
1750 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1751 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1752 Keywords.kw_CF_CLOSED_ENUM,
1753 Keywords.kw_NS_CLOSED_ENUM)) {
1754 parseEnum();
1755 }
1756 break;
1757 case tok::kw_class:
1758 if (Style.isVerilog()) {
1759 parseBlock();
1760 addUnwrappedLine();
1761 return;
1762 }
1763 [[fallthrough]];
1764 case tok::kw_struct:
1765 case tok::kw_union:
1766 if (parseStructLike())
1767 return;
1768 break;
1769 case tok::period:
1770 nextToken();
1771 // In Java, classes have an implicit static member "class".
1772 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1773 FormatTok->is(tok::kw_class)) {
1774 nextToken();
1775 }
1776 if (Style.isJavaScript() && FormatTok &&
1777 FormatTok->Tok.getIdentifierInfo()) {
1778 // JavaScript only has pseudo keywords, all keywords are allowed to
1779 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1780 nextToken();
1781 }
1782 break;
1783 case tok::semi:
1784 nextToken();
1785 addUnwrappedLine();
1786 return;
1787 case tok::r_brace:
1788 addUnwrappedLine();
1789 return;
1790 case tok::l_paren: {
1791 parseParens();
1792 // Break the unwrapped line if a K&R C function definition has a parameter
1793 // declaration.
1794 if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1795 break;
1796 if (isC78ParameterDecl(FormatTok,
1797 Tokens->peekNextToken(/*SkipComment=*/true),
1798 Previous)) {
1799 addUnwrappedLine();
1800 return;
1801 }
1802 break;
1803 }
1804 case tok::kw_operator:
1805 nextToken();
1806 if (FormatTok->isBinaryOperator())
1807 nextToken();
1808 break;
1809 case tok::caret:
1810 nextToken();
1811 // Block return type.
1812 if (FormatTok->Tok.isAnyIdentifier() ||
1813 FormatTok->isSimpleTypeSpecifier()) {
1814 nextToken();
1815 // Return types: pointers are ok too.
1816 while (FormatTok->is(tok::star))
1817 nextToken();
1818 }
1819 // Block argument list.
1820 if (FormatTok->is(tok::l_paren))
1821 parseParens();
1822 // Block body.
1823 if (FormatTok->is(tok::l_brace))
1824 parseChildBlock();
1825 break;
1826 case tok::l_brace:
1827 if (InRequiresExpression)
1828 FormatTok->setFinalizedType(TT_BracedListLBrace);
1829 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1830 // A block outside of parentheses must be the last part of a
1831 // structural element.
1832 // FIXME: Figure out cases where this is not true, and add projections
1833 // for them (the one we know is missing are lambdas).
1834 if (Style.Language == FormatStyle::LK_Java &&
1835 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1836 // If necessary, we could set the type to something different than
1837 // TT_FunctionLBrace.
1840 addUnwrappedLine();
1841 }
1842 } else if (Style.BraceWrapping.AfterFunction) {
1843 addUnwrappedLine();
1844 }
1845 FormatTok->setFinalizedType(TT_FunctionLBrace);
1846 parseBlock();
1847 addUnwrappedLine();
1848 return;
1849 }
1850 // Otherwise this was a braced init list, and the structural
1851 // element continues.
1852 break;
1853 case tok::kw_try:
1854 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1855 // field/method declaration.
1856 nextToken();
1857 break;
1858 }
1859 // We arrive here when parsing function-try blocks.
1860 if (Style.BraceWrapping.AfterFunction)
1861 addUnwrappedLine();
1862 parseTryCatch();
1863 return;
1864 case tok::identifier: {
1865 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1866 Line->MustBeDeclaration) {
1867 addUnwrappedLine();
1868 parseCSharpGenericTypeConstraint();
1869 break;
1870 }
1871 if (FormatTok->is(TT_MacroBlockEnd)) {
1872 addUnwrappedLine();
1873 return;
1874 }
1875
1876 // Function declarations (as opposed to function expressions) are parsed
1877 // on their own unwrapped line by continuing this loop. Function
1878 // expressions (functions that are not on their own line) must not create
1879 // a new unwrapped line, so they are special cased below.
1880 size_t TokenCount = Line->Tokens.size();
1881 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1882 (TokenCount > 1 ||
1883 (TokenCount == 1 &&
1884 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1885 tryToParseJSFunction();
1886 break;
1887 }
1888 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1889 FormatTok->is(Keywords.kw_interface)) {
1890 if (Style.isJavaScript()) {
1891 // In JavaScript/TypeScript, "interface" can be used as a standalone
1892 // identifier, e.g. in `var interface = 1;`. If "interface" is
1893 // followed by another identifier, it is very like to be an actual
1894 // interface declaration.
1895 unsigned StoredPosition = Tokens->getPosition();
1896 FormatToken *Next = Tokens->getNextToken();
1897 FormatTok = Tokens->setPosition(StoredPosition);
1898 if (!mustBeJSIdent(Keywords, Next)) {
1899 nextToken();
1900 break;
1901 }
1902 }
1903 parseRecord();
1904 addUnwrappedLine();
1905 return;
1906 }
1907
1908 if (Style.isVerilog()) {
1909 if (FormatTok->is(Keywords.kw_table)) {
1910 parseVerilogTable();
1911 return;
1912 }
1913 if (Keywords.isVerilogBegin(*FormatTok) ||
1914 Keywords.isVerilogHierarchy(*FormatTok)) {
1915 parseBlock();
1916 addUnwrappedLine();
1917 return;
1918 }
1919 }
1920
1921 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1922 if (parseStructLike())
1923 return;
1924 break;
1925 }
1926
1927 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1928 parseStatementMacro();
1929 return;
1930 }
1931
1932 // See if the following token should start a new unwrapped line.
1933 StringRef Text = FormatTok->TokenText;
1934
1935 FormatToken *PreviousToken = FormatTok;
1936 nextToken();
1937
1938 // JS doesn't have macros, and within classes colons indicate fields, not
1939 // labels.
1940 if (Style.isJavaScript())
1941 break;
1942
1943 auto OneTokenSoFar = [&]() {
1944 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1945 while (I != E && I->Tok->is(tok::comment))
1946 ++I;
1947 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1948 ++I;
1949 return I != E && (++I == E);
1950 };
1951 if (OneTokenSoFar()) {
1952 // Recognize function-like macro usages without trailing semicolon as
1953 // well as free-standing macros like Q_OBJECT.
1954 bool FunctionLike = FormatTok->is(tok::l_paren);
1955 if (FunctionLike)
1956 parseParens();
1957
1958 bool FollowedByNewline =
1959 CommentsBeforeNextToken.empty()
1960 ? FormatTok->NewlinesBefore > 0
1961 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1962
1963 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1964 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1965 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1966 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1967 addUnwrappedLine();
1968 return;
1969 }
1970 }
1971 break;
1972 }
1973 case tok::equal:
1974 if ((Style.isJavaScript() || Style.isCSharp()) &&
1975 FormatTok->is(TT_FatArrow)) {
1976 tryToParseChildBlock();
1977 break;
1978 }
1979
1980 nextToken();
1981 if (FormatTok->is(tok::l_brace)) {
1982 // Block kind should probably be set to BK_BracedInit for any language.
1983 // C# needs this change to ensure that array initialisers and object
1984 // initialisers are indented the same way.
1985 if (Style.isCSharp())
1986 FormatTok->setBlockKind(BK_BracedInit);
1987 nextToken();
1988 parseBracedList();
1989 } else if (Style.Language == FormatStyle::LK_Proto &&
1990 FormatTok->is(tok::less)) {
1991 nextToken();
1992 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1993 /*ClosingBraceKind=*/tok::greater);
1994 }
1995 break;
1996 case tok::l_square:
1997 parseSquare();
1998 break;
1999 case tok::kw_new:
2000 parseNew();
2001 break;
2002 case tok::kw_case:
2003 // Proto: there are no switch/case statements.
2004 if (Style.isProto()) {
2005 nextToken();
2006 return;
2007 }
2008 // In Verilog switch is called case.
2009 if (Style.isVerilog()) {
2010 parseBlock();
2011 addUnwrappedLine();
2012 return;
2013 }
2014 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2015 // 'case: string' field declaration.
2016 nextToken();
2017 break;
2018 }
2019 parseCaseLabel();
2020 break;
2021 case tok::kw_default:
2022 nextToken();
2023 if (Style.isVerilog()) {
2024 if (FormatTok->is(tok::colon)) {
2025 // The label will be handled in the next iteration.
2026 break;
2027 }
2028 if (FormatTok->is(Keywords.kw_clocking)) {
2029 // A default clocking block.
2030 parseBlock();
2031 addUnwrappedLine();
2032 return;
2033 }
2034 parseVerilogCaseLabel();
2035 return;
2036 }
2037 break;
2038 case tok::colon:
2039 nextToken();
2040 if (Style.isVerilog()) {
2041 parseVerilogCaseLabel();
2042 return;
2043 }
2044 break;
2045 default:
2046 nextToken();
2047 break;
2048 }
2049 } while (!eof());
2050}
2051
2052bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2053 assert(FormatTok->is(tok::l_brace));
2054 if (!Style.isCSharp())
2055 return false;
2056 // See if it's a property accessor.
2057 if (FormatTok->Previous->isNot(tok::identifier))
2058 return false;
2059
2060 // See if we are inside a property accessor.
2061 //
2062 // Record the current tokenPosition so that we can advance and
2063 // reset the current token. `Next` is not set yet so we need
2064 // another way to advance along the token stream.
2065 unsigned int StoredPosition = Tokens->getPosition();
2066 FormatToken *Tok = Tokens->getNextToken();
2067
2068 // A trivial property accessor is of the form:
2069 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2070 // Track these as they do not require line breaks to be introduced.
2071 bool HasSpecialAccessor = false;
2072 bool IsTrivialPropertyAccessor = true;
2073 while (!eof()) {
2074 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2075 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2076 Keywords.kw_init, Keywords.kw_set)) {
2077 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2078 HasSpecialAccessor = true;
2079 Tok = Tokens->getNextToken();
2080 continue;
2081 }
2082 if (Tok->isNot(tok::r_brace))
2083 IsTrivialPropertyAccessor = false;
2084 break;
2085 }
2086
2087 if (!HasSpecialAccessor) {
2088 Tokens->setPosition(StoredPosition);
2089 return false;
2090 }
2091
2092 // Try to parse the property accessor:
2093 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2094 Tokens->setPosition(StoredPosition);
2095 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2096 addUnwrappedLine();
2097 nextToken();
2098 do {
2099 switch (FormatTok->Tok.getKind()) {
2100 case tok::r_brace:
2101 nextToken();
2102 if (FormatTok->is(tok::equal)) {
2103 while (!eof() && FormatTok->isNot(tok::semi))
2104 nextToken();
2105 nextToken();
2106 }
2107 addUnwrappedLine();
2108 return true;
2109 case tok::l_brace:
2110 ++Line->Level;
2111 parseBlock(/*MustBeDeclaration=*/true);
2112 addUnwrappedLine();
2113 --Line->Level;
2114 break;
2115 case tok::equal:
2116 if (FormatTok->is(TT_FatArrow)) {
2117 ++Line->Level;
2118 do {
2119 nextToken();
2120 } while (!eof() && FormatTok->isNot(tok::semi));
2121 nextToken();
2122 addUnwrappedLine();
2123 --Line->Level;
2124 break;
2125 }
2126 nextToken();
2127 break;
2128 default:
2129 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2130 Keywords.kw_set) &&
2131 !IsTrivialPropertyAccessor) {
2132 // Non-trivial get/set needs to be on its own line.
2133 addUnwrappedLine();
2134 }
2135 nextToken();
2136 }
2137 } while (!eof());
2138
2139 // Unreachable for well-formed code (paired '{' and '}').
2140 return true;
2141}
2142
2143bool UnwrappedLineParser::tryToParseLambda() {
2144 assert(FormatTok->is(tok::l_square));
2145 if (!Style.isCpp()) {
2146 nextToken();
2147 return false;
2148 }
2149 FormatToken &LSquare = *FormatTok;
2150 if (!tryToParseLambdaIntroducer())
2151 return false;
2152
2153 bool SeenArrow = false;
2154 bool InTemplateParameterList = false;
2155
2156 while (FormatTok->isNot(tok::l_brace)) {
2157 if (FormatTok->isSimpleTypeSpecifier()) {
2158 nextToken();
2159 continue;
2160 }
2161 switch (FormatTok->Tok.getKind()) {
2162 case tok::l_brace:
2163 break;
2164 case tok::l_paren:
2165 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2166 break;
2167 case tok::l_square:
2168 parseSquare();
2169 break;
2170 case tok::less:
2171 assert(FormatTok->Previous);
2172 if (FormatTok->Previous->is(tok::r_square))
2173 InTemplateParameterList = true;
2174 nextToken();
2175 break;
2176 case tok::kw_auto:
2177 case tok::kw_class:
2178 case tok::kw_template:
2179 case tok::kw_typename:
2180 case tok::amp:
2181 case tok::star:
2182 case tok::kw_const:
2183 case tok::kw_constexpr:
2184 case tok::kw_consteval:
2185 case tok::comma:
2186 case tok::greater:
2187 case tok::identifier:
2188 case tok::numeric_constant:
2189 case tok::coloncolon:
2190 case tok::kw_mutable:
2191 case tok::kw_noexcept:
2192 case tok::kw_static:
2193 nextToken();
2194 break;
2195 // Specialization of a template with an integer parameter can contain
2196 // arithmetic, logical, comparison and ternary operators.
2197 //
2198 // FIXME: This also accepts sequences of operators that are not in the scope
2199 // of a template argument list.
2200 //
2201 // In a C++ lambda a template type can only occur after an arrow. We use
2202 // this as an heuristic to distinguish between Objective-C expressions
2203 // followed by an `a->b` expression, such as:
2204 // ([obj func:arg] + a->b)
2205 // Otherwise the code below would parse as a lambda.
2206 //
2207 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2208 // explicit template lists: []<bool b = true && false>(U &&u){}
2209 case tok::plus:
2210 case tok::minus:
2211 case tok::exclaim:
2212 case tok::tilde:
2213 case tok::slash:
2214 case tok::percent:
2215 case tok::lessless:
2216 case tok::pipe:
2217 case tok::pipepipe:
2218 case tok::ampamp:
2219 case tok::caret:
2220 case tok::equalequal:
2221 case tok::exclaimequal:
2222 case tok::greaterequal:
2223 case tok::lessequal:
2224 case tok::question:
2225 case tok::colon:
2226 case tok::ellipsis:
2227 case tok::kw_true:
2228 case tok::kw_false:
2229 if (SeenArrow || InTemplateParameterList) {
2230 nextToken();
2231 break;
2232 }
2233 return true;
2234 case tok::arrow:
2235 // This might or might not actually be a lambda arrow (this could be an
2236 // ObjC method invocation followed by a dereferencing arrow). We might
2237 // reset this back to TT_Unknown in TokenAnnotator.
2238 FormatTok->setFinalizedType(TT_LambdaArrow);
2239 SeenArrow = true;
2240 nextToken();
2241 break;
2242 case tok::kw_requires: {
2243 auto *RequiresToken = FormatTok;
2244 nextToken();
2245 parseRequiresClause(RequiresToken);
2246 break;
2247 }
2248 default:
2249 return true;
2250 }
2251 }
2252 FormatTok->setFinalizedType(TT_LambdaLBrace);
2253 LSquare.setFinalizedType(TT_LambdaLSquare);
2254 parseChildBlock();
2255 return true;
2256}
2257
2258bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2259 const FormatToken *Previous = FormatTok->Previous;
2260 const FormatToken *LeftSquare = FormatTok;
2261 nextToken();
2262 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2263 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2264 tok::kw_co_yield, tok::kw_co_return)) ||
2265 Previous->closesScope())) ||
2266 LeftSquare->isCppStructuredBinding(Style)) {
2267 return false;
2268 }
2269 if (FormatTok->is(tok::l_square))
2270 return false;
2271 if (FormatTok->is(tok::r_square)) {
2272 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2273 if (Next->is(tok::greater))
2274 return false;
2275 }
2276 parseSquare(/*LambdaIntroducer=*/true);
2277 return true;
2278}
2279
2280void UnwrappedLineParser::tryToParseJSFunction() {
2281 assert(FormatTok->is(Keywords.kw_function));
2282 if (FormatTok->is(Keywords.kw_async))
2283 nextToken();
2284 // Consume "function".
2285 nextToken();
2286
2287 // Consume * (generator function). Treat it like C++'s overloaded operators.
2288 if (FormatTok->is(tok::star)) {
2289 FormatTok->setFinalizedType(TT_OverloadedOperator);
2290 nextToken();
2291 }
2292
2293 // Consume function name.
2294 if (FormatTok->is(tok::identifier))
2295 nextToken();
2296
2297 if (FormatTok->isNot(tok::l_paren))
2298 return;
2299
2300 // Parse formal parameter list.
2301 parseParens();
2302
2303 if (FormatTok->is(tok::colon)) {
2304 // Parse a type definition.
2305 nextToken();
2306
2307 // Eat the type declaration. For braced inline object types, balance braces,
2308 // otherwise just parse until finding an l_brace for the function body.
2309 if (FormatTok->is(tok::l_brace))
2310 tryToParseBracedList();
2311 else
2312 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2313 nextToken();
2314 }
2315
2316 if (FormatTok->is(tok::semi))
2317 return;
2318
2319 parseChildBlock();
2320}
2321
2322bool UnwrappedLineParser::tryToParseBracedList() {
2323 if (FormatTok->is(BK_Unknown))
2324 calculateBraceTypes();
2325 assert(FormatTok->isNot(BK_Unknown));
2326 if (FormatTok->is(BK_Block))
2327 return false;
2328 nextToken();
2329 parseBracedList();
2330 return true;
2331}
2332
2333bool UnwrappedLineParser::tryToParseChildBlock() {
2334 assert(Style.isJavaScript() || Style.isCSharp());
2335 assert(FormatTok->is(TT_FatArrow));
2336 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2337 // They always start an expression or a child block if followed by a curly
2338 // brace.
2339 nextToken();
2340 if (FormatTok->isNot(tok::l_brace))
2341 return false;
2342 parseChildBlock();
2343 return true;
2344}
2345
2346bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2347 bool IsEnum,
2348 tok::TokenKind ClosingBraceKind) {
2349 bool HasError = false;
2350
2351 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2352 // replace this by using parseAssignmentExpression() inside.
2353 do {
2354 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2355 tryToParseChildBlock()) {
2356 continue;
2357 }
2358 if (Style.isJavaScript()) {
2359 if (FormatTok->is(Keywords.kw_function)) {
2360 tryToParseJSFunction();
2361 continue;
2362 }
2363 if (FormatTok->is(tok::l_brace)) {
2364 // Could be a method inside of a braced list `{a() { return 1; }}`.
2365 if (tryToParseBracedList())
2366 continue;
2367 parseChildBlock();
2368 }
2369 }
2370 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2371 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2372 addUnwrappedLine();
2373 nextToken();
2374 return !HasError;
2375 }
2376 switch (FormatTok->Tok.getKind()) {
2377 case tok::l_square:
2378 if (Style.isCSharp())
2379 parseSquare();
2380 else
2381 tryToParseLambda();
2382 break;
2383 case tok::l_paren:
2384 parseParens();
2385 // JavaScript can just have free standing methods and getters/setters in
2386 // object literals. Detect them by a "{" following ")".
2387 if (Style.isJavaScript()) {
2388 if (FormatTok->is(tok::l_brace))
2389 parseChildBlock();
2390 break;
2391 }
2392 break;
2393 case tok::l_brace:
2394 // Assume there are no blocks inside a braced init list apart
2395 // from the ones we explicitly parse out (like lambdas).
2396 FormatTok->setBlockKind(BK_BracedInit);
2397 nextToken();
2398 parseBracedList();
2399 break;
2400 case tok::less:
2401 if (Style.Language == FormatStyle::LK_Proto ||
2402 ClosingBraceKind == tok::greater) {
2403 nextToken();
2404 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2405 /*ClosingBraceKind=*/tok::greater);
2406 } else {
2407 nextToken();
2408 }
2409 break;
2410 case tok::semi:
2411 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2412 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2413 // used for error recovery if we have otherwise determined that this is
2414 // a braced list.
2415 if (Style.isJavaScript()) {
2416 nextToken();
2417 break;
2418 }
2419 HasError = true;
2420 if (!ContinueOnSemicolons)
2421 return !HasError;
2422 nextToken();
2423 break;
2424 case tok::comma:
2425 nextToken();
2426 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2427 addUnwrappedLine();
2428 break;
2429 default:
2430 nextToken();
2431 break;
2432 }
2433 } while (!eof());
2434 return false;
2435}
2436
2437/// \brief Parses a pair of parentheses (and everything between them).
2438/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2439/// double ampersands. This applies for all nested scopes as well.
2440///
2441/// Returns whether there is a `=` token between the parentheses.
2442bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2443 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2444 auto *LeftParen = FormatTok;
2445 bool SeenEqual = false;
2446 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2447 nextToken();
2448 do {
2449 switch (FormatTok->Tok.getKind()) {
2450 case tok::l_paren:
2451 if (parseParens(AmpAmpTokenType))
2452 SeenEqual = true;
2453 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2454 parseChildBlock();
2455 break;
2456 case tok::r_paren:
2457 if (!MightBeStmtExpr &&
2459 const auto *Prev = LeftParen->Previous;
2460 const auto *Next = Tokens->peekNextToken();
2461 const bool DoubleParens =
2462 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2463 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2464 const bool Blacklisted =
2465 PrevPrev &&
2466 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2467 (SeenEqual &&
2468 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2469 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2470 const bool ReturnParens =
2472 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2473 Next->is(tok::semi);
2474 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2475 LeftParen->Optional = true;
2476 FormatTok->Optional = true;
2477 }
2478 }
2479 nextToken();
2480 return SeenEqual;
2481 case tok::r_brace:
2482 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2483 return SeenEqual;
2484 case tok::l_square:
2485 tryToParseLambda();
2486 break;
2487 case tok::l_brace:
2488 if (!tryToParseBracedList())
2489 parseChildBlock();
2490 break;
2491 case tok::at:
2492 nextToken();
2493 if (FormatTok->is(tok::l_brace)) {
2494 nextToken();
2495 parseBracedList();
2496 }
2497 break;
2498 case tok::equal:
2499 SeenEqual = true;
2500 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2501 tryToParseChildBlock();
2502 else
2503 nextToken();
2504 break;
2505 case tok::kw_class:
2506 if (Style.isJavaScript())
2507 parseRecord(/*ParseAsExpr=*/true);
2508 else
2509 nextToken();
2510 break;
2511 case tok::identifier:
2512 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2513 tryToParseJSFunction();
2514 else
2515 nextToken();
2516 break;
2517 case tok::kw_requires: {
2518 auto RequiresToken = FormatTok;
2519 nextToken();
2520 parseRequiresExpression(RequiresToken);
2521 break;
2522 }
2523 case tok::ampamp:
2524 if (AmpAmpTokenType != TT_Unknown)
2525 FormatTok->setFinalizedType(AmpAmpTokenType);
2526 [[fallthrough]];
2527 default:
2528 nextToken();
2529 break;
2530 }
2531 } while (!eof());
2532 return SeenEqual;
2533}
2534
2535void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2536 if (!LambdaIntroducer) {
2537 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2538 if (tryToParseLambda())
2539 return;
2540 }
2541 do {
2542 switch (FormatTok->Tok.getKind()) {
2543 case tok::l_paren:
2544 parseParens();
2545 break;
2546 case tok::r_square:
2547 nextToken();
2548 return;
2549 case tok::r_brace:
2550 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2551 return;
2552 case tok::l_square:
2553 parseSquare();
2554 break;
2555 case tok::l_brace: {
2556 if (!tryToParseBracedList())
2557 parseChildBlock();
2558 break;
2559 }
2560 case tok::at:
2561 nextToken();
2562 if (FormatTok->is(tok::l_brace)) {
2563 nextToken();
2564 parseBracedList();
2565 }
2566 break;
2567 default:
2568 nextToken();
2569 break;
2570 }
2571 } while (!eof());
2572}
2573
2574void UnwrappedLineParser::keepAncestorBraces() {
2575 if (!Style.RemoveBracesLLVM)
2576 return;
2577
2578 const int MaxNestingLevels = 2;
2579 const int Size = NestedTooDeep.size();
2580 if (Size >= MaxNestingLevels)
2581 NestedTooDeep[Size - MaxNestingLevels] = true;
2582 NestedTooDeep.push_back(false);
2583}
2584
2586 for (const auto &Token : llvm::reverse(Line.Tokens))
2587 if (Token.Tok->isNot(tok::comment))
2588 return Token.Tok;
2589
2590 return nullptr;
2591}
2592
2593void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2594 FormatToken *Tok = nullptr;
2595
2596 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2597 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2599 ? getLastNonComment(*Line)
2600 : Line->Tokens.back().Tok;
2601 assert(Tok);
2602 if (Tok->BraceCount < 0) {
2603 assert(Tok->BraceCount == -1);
2604 Tok = nullptr;
2605 } else {
2606 Tok->BraceCount = -1;
2607 }
2608 }
2609
2610 addUnwrappedLine();
2611 ++Line->Level;
2612 parseStructuralElement();
2613
2614 if (Tok) {
2615 assert(!Line->InPPDirective);
2616 Tok = nullptr;
2617 for (const auto &L : llvm::reverse(*CurrentLines)) {
2618 if (!L.InPPDirective && getLastNonComment(L)) {
2619 Tok = L.Tokens.back().Tok;
2620 break;
2621 }
2622 }
2623 assert(Tok);
2624 ++Tok->BraceCount;
2625 }
2626
2627 if (CheckEOF && eof())
2628 addUnwrappedLine();
2629
2630 --Line->Level;
2631}
2632
2633static void markOptionalBraces(FormatToken *LeftBrace) {
2634 if (!LeftBrace)
2635 return;
2636
2637 assert(LeftBrace->is(tok::l_brace));
2638
2639 FormatToken *RightBrace = LeftBrace->MatchingParen;
2640 if (!RightBrace) {
2641 assert(!LeftBrace->Optional);
2642 return;
2643 }
2644
2645 assert(RightBrace->is(tok::r_brace));
2646 assert(RightBrace->MatchingParen == LeftBrace);
2647 assert(LeftBrace->Optional == RightBrace->Optional);
2648
2649 LeftBrace->Optional = true;
2650 RightBrace->Optional = true;
2651}
2652
2653void UnwrappedLineParser::handleAttributes() {
2654 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2655 if (FormatTok->isAttribute())
2656 nextToken();
2657 else if (FormatTok->is(tok::l_square))
2658 handleCppAttributes();
2659}
2660
2661bool UnwrappedLineParser::handleCppAttributes() {
2662 // Handle [[likely]] / [[unlikely]] attributes.
2663 assert(FormatTok->is(tok::l_square));
2664 if (!tryToParseSimpleAttribute())
2665 return false;
2666 parseSquare();
2667 return true;
2668}
2669
2670/// Returns whether \c Tok begins a block.
2671bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2672 // FIXME: rename the function or make
2673 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2674 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2675 : Tok.is(tok::l_brace);
2676}
2677
2678FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2679 bool KeepBraces,
2680 bool IsVerilogAssert) {
2681 assert((FormatTok->is(tok::kw_if) ||
2682 (Style.isVerilog() &&
2683 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2684 Keywords.kw_assume, Keywords.kw_cover))) &&
2685 "'if' expected");
2686 nextToken();
2687
2688 if (IsVerilogAssert) {
2689 // Handle `assert #0` and `assert final`.
2690 if (FormatTok->is(Keywords.kw_verilogHash)) {
2691 nextToken();
2692 if (FormatTok->is(tok::numeric_constant))
2693 nextToken();
2694 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2695 Keywords.kw_sequence)) {
2696 nextToken();
2697 }
2698 }
2699
2700 // Handle `if !consteval`.
2701 if (FormatTok->is(tok::exclaim))
2702 nextToken();
2703
2704 bool KeepIfBraces = true;
2705 if (FormatTok->is(tok::kw_consteval)) {
2706 nextToken();
2707 } else {
2708 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2709 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2710 nextToken();
2711 if (FormatTok->is(tok::l_paren)) {
2712 FormatTok->setFinalizedType(TT_ConditionLParen);
2713 parseParens();
2714 }
2715 }
2716 handleAttributes();
2717 // The then action is optional in Verilog assert statements.
2718 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2719 nextToken();
2720 addUnwrappedLine();
2721 return nullptr;
2722 }
2723
2724 bool NeedsUnwrappedLine = false;
2725 keepAncestorBraces();
2726
2727 FormatToken *IfLeftBrace = nullptr;
2728 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2729
2730 if (isBlockBegin(*FormatTok)) {
2731 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2732 IfLeftBrace = FormatTok;
2733 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2734 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2735 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2736 if (Style.BraceWrapping.BeforeElse)
2737 addUnwrappedLine();
2738 else
2739 NeedsUnwrappedLine = true;
2740 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2741 addUnwrappedLine();
2742 } else {
2743 parseUnbracedBody();
2744 }
2745
2746 if (Style.RemoveBracesLLVM) {
2747 assert(!NestedTooDeep.empty());
2748 KeepIfBraces = KeepIfBraces ||
2749 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2750 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2751 IfBlockKind == IfStmtKind::IfElseIf;
2752 }
2753
2754 bool KeepElseBraces = KeepIfBraces;
2755 FormatToken *ElseLeftBrace = nullptr;
2756 IfStmtKind Kind = IfStmtKind::IfOnly;
2757
2758 if (FormatTok->is(tok::kw_else)) {
2759 if (Style.RemoveBracesLLVM) {
2760 NestedTooDeep.back() = false;
2761 Kind = IfStmtKind::IfElse;
2762 }
2763 nextToken();
2764 handleAttributes();
2765 if (isBlockBegin(*FormatTok)) {
2766 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2767 FormatTok->setFinalizedType(TT_ElseLBrace);
2768 ElseLeftBrace = FormatTok;
2769 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2770 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2771 FormatToken *IfLBrace =
2772 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2773 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2774 if (FormatTok->is(tok::kw_else)) {
2775 KeepElseBraces = KeepElseBraces ||
2776 ElseBlockKind == IfStmtKind::IfOnly ||
2777 ElseBlockKind == IfStmtKind::IfElseIf;
2778 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2779 KeepElseBraces = true;
2780 assert(ElseLeftBrace->MatchingParen);
2781 markOptionalBraces(ElseLeftBrace);
2782 }
2783 addUnwrappedLine();
2784 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2785 const FormatToken *Previous = Tokens->getPreviousToken();
2786 assert(Previous);
2787 const bool IsPrecededByComment = Previous->is(tok::comment);
2788 if (IsPrecededByComment) {
2789 addUnwrappedLine();
2790 ++Line->Level;
2791 }
2792 bool TooDeep = true;
2793 if (Style.RemoveBracesLLVM) {
2794 Kind = IfStmtKind::IfElseIf;
2795 TooDeep = NestedTooDeep.pop_back_val();
2796 }
2797 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2798 if (Style.RemoveBracesLLVM)
2799 NestedTooDeep.push_back(TooDeep);
2800 if (IsPrecededByComment)
2801 --Line->Level;
2802 } else {
2803 parseUnbracedBody(/*CheckEOF=*/true);
2804 }
2805 } else {
2806 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2807 if (NeedsUnwrappedLine)
2808 addUnwrappedLine();
2809 }
2810
2811 if (!Style.RemoveBracesLLVM)
2812 return nullptr;
2813
2814 assert(!NestedTooDeep.empty());
2815 KeepElseBraces = KeepElseBraces ||
2816 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2817 NestedTooDeep.back();
2818
2819 NestedTooDeep.pop_back();
2820
2821 if (!KeepIfBraces && !KeepElseBraces) {
2822 markOptionalBraces(IfLeftBrace);
2823 markOptionalBraces(ElseLeftBrace);
2824 } else if (IfLeftBrace) {
2825 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2826 if (IfRightBrace) {
2827 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2828 assert(!IfLeftBrace->Optional);
2829 assert(!IfRightBrace->Optional);
2830 IfLeftBrace->MatchingParen = nullptr;
2831 IfRightBrace->MatchingParen = nullptr;
2832 }
2833 }
2834
2835 if (IfKind)
2836 *IfKind = Kind;
2837
2838 return IfLeftBrace;
2839}
2840
2841void UnwrappedLineParser::parseTryCatch() {
2842 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2843 nextToken();
2844 bool NeedsUnwrappedLine = false;
2845 if (FormatTok->is(tok::colon)) {
2846 // We are in a function try block, what comes is an initializer list.
2847 nextToken();
2848
2849 // In case identifiers were removed by clang-tidy, what might follow is
2850 // multiple commas in sequence - before the first identifier.
2851 while (FormatTok->is(tok::comma))
2852 nextToken();
2853
2854 while (FormatTok->is(tok::identifier)) {
2855 nextToken();
2856 if (FormatTok->is(tok::l_paren))
2857 parseParens();
2858 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2859 FormatTok->is(tok::l_brace)) {
2860 do {
2861 nextToken();
2862 } while (FormatTok->isNot(tok::r_brace));
2863 nextToken();
2864 }
2865
2866 // In case identifiers were removed by clang-tidy, what might follow is
2867 // multiple commas in sequence - after the first identifier.
2868 while (FormatTok->is(tok::comma))
2869 nextToken();
2870 }
2871 }
2872 // Parse try with resource.
2873 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2874 parseParens();
2875
2876 keepAncestorBraces();
2877
2878 if (FormatTok->is(tok::l_brace)) {
2879 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2880 parseBlock();
2881 if (Style.BraceWrapping.BeforeCatch)
2882 addUnwrappedLine();
2883 else
2884 NeedsUnwrappedLine = true;
2885 } else if (FormatTok->isNot(tok::kw_catch)) {
2886 // The C++ standard requires a compound-statement after a try.
2887 // If there's none, we try to assume there's a structuralElement
2888 // and try to continue.
2889 addUnwrappedLine();
2890 ++Line->Level;
2891 parseStructuralElement();
2892 --Line->Level;
2893 }
2894 while (true) {
2895 if (FormatTok->is(tok::at))
2896 nextToken();
2897 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2898 tok::kw___finally) ||
2899 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2900 FormatTok->is(Keywords.kw_finally)) ||
2901 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2902 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2903 break;
2904 }
2905 nextToken();
2906 while (FormatTok->isNot(tok::l_brace)) {
2907 if (FormatTok->is(tok::l_paren)) {
2908 parseParens();
2909 continue;
2910 }
2911 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2912 if (Style.RemoveBracesLLVM)
2913 NestedTooDeep.pop_back();
2914 return;
2915 }
2916 nextToken();
2917 }
2918 NeedsUnwrappedLine = false;
2919 Line->MustBeDeclaration = false;
2920 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2921 parseBlock();
2922 if (Style.BraceWrapping.BeforeCatch)
2923 addUnwrappedLine();
2924 else
2925 NeedsUnwrappedLine = true;
2926 }
2927
2928 if (Style.RemoveBracesLLVM)
2929 NestedTooDeep.pop_back();
2930
2931 if (NeedsUnwrappedLine)
2932 addUnwrappedLine();
2933}
2934
2935void UnwrappedLineParser::parseNamespace() {
2936 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2937 "'namespace' expected");
2938
2939 const FormatToken &InitialToken = *FormatTok;
2940 nextToken();
2941 if (InitialToken.is(TT_NamespaceMacro)) {
2942 parseParens();
2943 } else {
2944 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2945 tok::l_square, tok::period, tok::l_paren) ||
2946 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2947 if (FormatTok->is(tok::l_square))
2948 parseSquare();
2949 else if (FormatTok->is(tok::l_paren))
2950 parseParens();
2951 else
2952 nextToken();
2953 }
2954 }
2955 if (FormatTok->is(tok::l_brace)) {
2956 FormatTok->setFinalizedType(TT_NamespaceLBrace);
2957
2958 if (ShouldBreakBeforeBrace(Style, InitialToken))
2959 addUnwrappedLine();
2960
2961 unsigned AddLevels =
2964 DeclarationScopeStack.size() > 1)
2965 ? 1u
2966 : 0u;
2967 bool ManageWhitesmithsBraces =
2968 AddLevels == 0u &&
2970
2971 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2972 // the whole block.
2973 if (ManageWhitesmithsBraces)
2974 ++Line->Level;
2975
2976 // Munch the semicolon after a namespace. This is more common than one would
2977 // think. Putting the semicolon into its own line is very ugly.
2978 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2979 /*KeepBraces=*/true, /*IfKind=*/nullptr,
2980 ManageWhitesmithsBraces);
2981
2982 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2983
2984 if (ManageWhitesmithsBraces)
2985 --Line->Level;
2986 }
2987 // FIXME: Add error handling.
2988}
2989
2990void UnwrappedLineParser::parseNew() {
2991 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2992 nextToken();
2993
2994 if (Style.isCSharp()) {
2995 do {
2996 // Handle constructor invocation, e.g. `new(field: value)`.
2997 if (FormatTok->is(tok::l_paren))
2998 parseParens();
2999
3000 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3001 if (FormatTok->is(tok::l_brace))
3002 parseBracedList();
3003
3004 if (FormatTok->isOneOf(tok::semi, tok::comma))
3005 return;
3006
3007 nextToken();
3008 } while (!eof());
3009 }
3010
3011 if (Style.Language != FormatStyle::LK_Java)
3012 return;
3013
3014 // In Java, we can parse everything up to the parens, which aren't optional.
3015 do {
3016 // There should not be a ;, { or } before the new's open paren.
3017 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3018 return;
3019
3020 // Consume the parens.
3021 if (FormatTok->is(tok::l_paren)) {
3022 parseParens();
3023
3024 // If there is a class body of an anonymous class, consume that as child.
3025 if (FormatTok->is(tok::l_brace))
3026 parseChildBlock();
3027 return;
3028 }
3029 nextToken();
3030 } while (!eof());
3031}
3032
3033void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3034 keepAncestorBraces();
3035
3036 if (isBlockBegin(*FormatTok)) {
3037 if (!KeepBraces)
3038 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3039 FormatToken *LeftBrace = FormatTok;
3040 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3041 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3042 /*MunchSemi=*/true, KeepBraces);
3043 if (!KeepBraces) {
3044 assert(!NestedTooDeep.empty());
3045 if (!NestedTooDeep.back())
3046 markOptionalBraces(LeftBrace);
3047 }
3048 if (WrapRightBrace)
3049 addUnwrappedLine();
3050 } else {
3051 parseUnbracedBody();
3052 }
3053
3054 if (!KeepBraces)
3055 NestedTooDeep.pop_back();
3056}
3057
3058void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3059 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3060 (Style.isVerilog() &&
3061 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3062 Keywords.kw_always_ff, Keywords.kw_always_latch,
3063 Keywords.kw_final, Keywords.kw_initial,
3064 Keywords.kw_foreach, Keywords.kw_forever,
3065 Keywords.kw_repeat))) &&
3066 "'for', 'while' or foreach macro expected");
3067 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3068 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3069
3070 nextToken();
3071 // JS' for await ( ...
3072 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3073 nextToken();
3074 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3075 nextToken();
3076 if (HasParens && FormatTok->is(tok::l_paren)) {
3077 // The type is only set for Verilog basically because we were afraid to
3078 // change the existing behavior for loops. See the discussion on D121756 for
3079 // details.
3080 if (Style.isVerilog())
3081 FormatTok->setFinalizedType(TT_ConditionLParen);
3082 parseParens();
3083 }
3084 // Event control.
3085 if (Style.isVerilog())
3086 parseVerilogSensitivityList();
3087
3088 handleAttributes();
3089 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3090}
3091
3092void UnwrappedLineParser::parseDoWhile() {
3093 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3094 nextToken();
3095
3096 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3097
3098 // FIXME: Add error handling.
3099 if (FormatTok->isNot(tok::kw_while)) {
3100 addUnwrappedLine();
3101 return;
3102 }
3103
3104 // If in Whitesmiths mode, the line with the while() needs to be indented
3105 // to the same level as the block.
3107 ++Line->Level;
3108
3109 nextToken();
3110 parseStructuralElement();
3111}
3112
3113void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3114 nextToken();
3115 unsigned OldLineLevel = Line->Level;
3116 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3117 --Line->Level;
3118 if (LeftAlignLabel)
3119 Line->Level = 0;
3120
3121 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3122 FormatTok->is(tok::l_brace)) {
3123
3124 CompoundStatementIndenter Indenter(this, Line->Level,
3127 parseBlock();
3128 if (FormatTok->is(tok::kw_break)) {
3131 addUnwrappedLine();
3132 if (!Style.IndentCaseBlocks &&
3134 ++Line->Level;
3135 }
3136 }
3137 parseStructuralElement();
3138 }
3139 addUnwrappedLine();
3140 } else {
3141 if (FormatTok->is(tok::semi))
3142 nextToken();
3143 addUnwrappedLine();
3144 }
3145 Line->Level = OldLineLevel;
3146 if (FormatTok->isNot(tok::l_brace)) {
3147 parseStructuralElement();
3148 addUnwrappedLine();
3149 }
3150}
3151
3152void UnwrappedLineParser::parseCaseLabel() {
3153 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3154
3155 // FIXME: fix handling of complex expressions here.
3156 do {
3157 nextToken();
3158 if (FormatTok->is(tok::colon)) {
3159 FormatTok->setFinalizedType(TT_CaseLabelColon);
3160 break;
3161 }
3162 } while (!eof());
3163 parseLabel();
3164}
3165
3166void UnwrappedLineParser::parseSwitch() {
3167 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3168 nextToken();
3169 if (FormatTok->is(tok::l_paren))
3170 parseParens();
3171
3172 keepAncestorBraces();
3173
3174 if (FormatTok->is(tok::l_brace)) {
3175 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3176 parseBlock();
3177 addUnwrappedLine();
3178 } else {
3179 addUnwrappedLine();
3180 ++Line->Level;
3181 parseStructuralElement();
3182 --Line->Level;
3183 }
3184
3185 if (Style.RemoveBracesLLVM)
3186 NestedTooDeep.pop_back();
3187}
3188
3189// Operators that can follow a C variable.
3191 switch (kind) {
3192 case tok::ampamp:
3193 case tok::ampequal:
3194 case tok::arrow:
3195 case tok::caret:
3196 case tok::caretequal:
3197 case tok::comma:
3198 case tok::ellipsis:
3199 case tok::equal:
3200 case tok::equalequal:
3201 case tok::exclaim:
3202 case tok::exclaimequal:
3203 case tok::greater:
3204 case tok::greaterequal:
3205 case tok::greatergreater:
3206 case tok::greatergreaterequal:
3207 case tok::l_paren:
3208 case tok::l_square:
3209 case tok::less:
3210 case tok::lessequal:
3211 case tok::lessless:
3212 case tok::lesslessequal:
3213 case tok::minus:
3214 case tok::minusequal:
3215 case tok::minusminus:
3216 case tok::percent:
3217 case tok::percentequal:
3218 case tok::period:
3219 case tok::pipe:
3220 case tok::pipeequal:
3221 case tok::pipepipe:
3222 case tok::plus:
3223 case tok::plusequal:
3224 case tok::plusplus:
3225 case tok::question:
3226 case tok::r_brace:
3227 case tok::r_paren:
3228 case tok::r_square:
3229 case tok::semi:
3230 case tok::slash:
3231 case tok::slashequal:
3232 case tok::star:
3233 case tok::starequal:
3234 return true;
3235 default:
3236 return false;
3237 }
3238}
3239
3240void UnwrappedLineParser::parseAccessSpecifier() {
3241 FormatToken *AccessSpecifierCandidate = FormatTok;
3242 nextToken();
3243 // Understand Qt's slots.
3244 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3245 nextToken();
3246 // Otherwise, we don't know what it is, and we'd better keep the next token.
3247 if (FormatTok->is(tok::colon)) {
3248 nextToken();
3249 addUnwrappedLine();
3250 } else if (FormatTok->isNot(tok::coloncolon) &&
3251 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3252 // Not a variable name nor namespace name.
3253 addUnwrappedLine();
3254 } else if (AccessSpecifierCandidate) {
3255 // Consider the access specifier to be a C identifier.
3256 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3257 }
3258}
3259
3260/// \brief Parses a requires, decides if it is a clause or an expression.
3261/// \pre The current token has to be the requires keyword.
3262/// \returns true if it parsed a clause.
3263bool clang::format::UnwrappedLineParser::parseRequires() {
3264 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3265 auto RequiresToken = FormatTok;
3266
3267 // We try to guess if it is a requires clause, or a requires expression. For
3268 // that we first consume the keyword and check the next token.
3269 nextToken();
3270
3271 switch (FormatTok->Tok.getKind()) {
3272 case tok::l_brace:
3273 // This can only be an expression, never a clause.
3274 parseRequiresExpression(RequiresToken);
3275 return false;
3276 case tok::l_paren:
3277 // Clauses and expression can start with a paren, it's unclear what we have.
3278 break;
3279 default:
3280 // All other tokens can only be a clause.
3281 parseRequiresClause(RequiresToken);
3282 return true;
3283 }
3284
3285 // Looking forward we would have to decide if there are function declaration
3286 // like arguments to the requires expression:
3287 // requires (T t) {
3288 // Or there is a constraint expression for the requires clause:
3289 // requires (C<T> && ...
3290
3291 // But first let's look behind.
3292 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3293
3294 if (!PreviousNonComment ||
3295 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3296 // If there is no token, or an expression left brace, we are a requires
3297 // clause within a requires expression.
3298 parseRequiresClause(RequiresToken);
3299 return true;
3300 }
3301
3302 switch (PreviousNonComment->Tok.getKind()) {
3303 case tok::greater:
3304 case tok::r_paren:
3305 case tok::kw_noexcept:
3306 case tok::kw_const:
3307 // This is a requires clause.
3308 parseRequiresClause(RequiresToken);
3309 return true;
3310 case tok::amp:
3311 case tok::ampamp: {
3312 // This can be either:
3313 // if (... && requires (T t) ...)
3314 // Or
3315 // void member(...) && requires (C<T> ...
3316 // We check the one token before that for a const:
3317 // void member(...) const && requires (C<T> ...
3318 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3319 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3320 parseRequiresClause(RequiresToken);
3321 return true;
3322 }
3323 break;
3324 }
3325 default:
3326 if (PreviousNonComment->isTypeOrIdentifier()) {
3327 // This is a requires clause.
3328 parseRequiresClause(RequiresToken);
3329 return true;
3330 }
3331 // It's an expression.
3332 parseRequiresExpression(RequiresToken);
3333 return false;
3334 }
3335
3336 // Now we look forward and try to check if the paren content is a parameter
3337 // list. The parameters can be cv-qualified and contain references or
3338 // pointers.
3339 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3340 // of stuff: typename, const, *, &, &&, ::, identifiers.
3341
3342 unsigned StoredPosition = Tokens->getPosition();
3343 FormatToken *NextToken = Tokens->getNextToken();
3344 int Lookahead = 0;
3345 auto PeekNext = [&Lookahead, &NextToken, this] {
3346 ++Lookahead;
3347 NextToken = Tokens->getNextToken();
3348 };
3349
3350 bool FoundType = false;
3351 bool LastWasColonColon = false;
3352 int OpenAngles = 0;
3353
3354 for (; Lookahead < 50; PeekNext()) {
3355 switch (NextToken->Tok.getKind()) {
3356 case tok::kw_volatile:
3357 case tok::kw_const:
3358 case tok::comma:
3359 if (OpenAngles == 0) {
3360 FormatTok = Tokens->setPosition(StoredPosition);
3361 parseRequiresExpression(RequiresToken);
3362 return false;
3363 }
3364 break;
3365 case tok::r_paren:
3366 case tok::pipepipe:
3367 FormatTok = Tokens->setPosition(StoredPosition);
3368 parseRequiresClause(RequiresToken);
3369 return true;
3370 case tok::eof:
3371 // Break out of the loop.
3372 Lookahead = 50;
3373 break;
3374 case tok::coloncolon:
3375 LastWasColonColon = true;
3376 break;
3377 case tok::identifier:
3378 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3379 FormatTok = Tokens->setPosition(StoredPosition);
3380 parseRequiresExpression(RequiresToken);
3381 return false;
3382 }
3383 FoundType = true;
3384 LastWasColonColon = false;
3385 break;
3386 case tok::less:
3387 ++OpenAngles;
3388 break;
3389 case tok::greater:
3390 --OpenAngles;
3391 break;
3392 default:
3393 if (NextToken->isSimpleTypeSpecifier()) {
3394 FormatTok = Tokens->setPosition(StoredPosition);
3395 parseRequiresExpression(RequiresToken);
3396 return false;
3397 }
3398 break;
3399 }
3400 }
3401 // This seems to be a complicated expression, just assume it's a clause.
3402 FormatTok = Tokens->setPosition(StoredPosition);
3403 parseRequiresClause(RequiresToken);
3404 return true;
3405}
3406
3407/// \brief Parses a requires clause.
3408/// \param RequiresToken The requires keyword token, which starts this clause.
3409/// \pre We need to be on the next token after the requires keyword.
3410/// \sa parseRequiresExpression
3411///
3412/// Returns if it either has finished parsing the clause, or it detects, that
3413/// the clause is incorrect.
3414void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3415 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3416 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3417
3418 // If there is no previous token, we are within a requires expression,
3419 // otherwise we will always have the template or function declaration in front
3420 // of it.
3421 bool InRequiresExpression =
3422 !RequiresToken->Previous ||
3423 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3424
3425 RequiresToken->setFinalizedType(InRequiresExpression
3426 ? TT_RequiresClauseInARequiresExpression
3427 : TT_RequiresClause);
3428
3429 // NOTE: parseConstraintExpression is only ever called from this function.
3430 // It could be inlined into here.
3431 parseConstraintExpression();
3432
3433 if (!InRequiresExpression)
3434 FormatTok->Previous->ClosesRequiresClause = true;
3435}
3436
3437/// \brief Parses a requires expression.
3438/// \param RequiresToken The requires keyword token, which starts this clause.
3439/// \pre We need to be on the next token after the requires keyword.
3440/// \sa parseRequiresClause
3441///
3442/// Returns if it either has finished parsing the expression, or it detects,
3443/// that the expression is incorrect.
3444void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3445 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3446 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3447
3448 RequiresToken->setFinalizedType(TT_RequiresExpression);
3449
3450 if (FormatTok->is(tok::l_paren)) {
3451 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3452 parseParens();
3453 }
3454
3455 if (FormatTok->is(tok::l_brace)) {
3456 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3457 parseChildBlock();
3458 }
3459}
3460
3461/// \brief Parses a constraint expression.
3462///
3463/// This is the body of a requires clause. It returns, when the parsing is
3464/// complete, or the expression is incorrect.
3465void UnwrappedLineParser::parseConstraintExpression() {
3466 // The special handling for lambdas is needed since tryToParseLambda() eats a
3467 // token and if a requires expression is the last part of a requires clause
3468 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3469 // not set on the correct token. Thus we need to be aware if we even expect a
3470 // lambda to be possible.
3471 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3472 bool LambdaNextTimeAllowed = true;
3473
3474 // Within lambda declarations, it is permitted to put a requires clause after
3475 // its template parameter list, which would place the requires clause right
3476 // before the parentheses of the parameters of the lambda declaration. Thus,
3477 // we track if we expect to see grouping parentheses at all.
3478 // Without this check, `requires foo<T> (T t)` in the below example would be
3479 // seen as the whole requires clause, accidentally eating the parameters of
3480 // the lambda.
3481 // [&]<typename T> requires foo<T> (T t) { ... };
3482 bool TopLevelParensAllowed = true;
3483
3484 do {
3485 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3486
3487 switch (FormatTok->Tok.getKind()) {
3488 case tok::kw_requires: {
3489 auto RequiresToken = FormatTok;
3490 nextToken();
3491 parseRequiresExpression(RequiresToken);
3492 break;
3493 }
3494
3495 case tok::l_paren:
3496 if (!TopLevelParensAllowed)
3497 return;
3498 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3499 TopLevelParensAllowed = false;
3500 break;
3501
3502 case tok::l_square:
3503 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3504 return;
3505 break;
3506
3507 case tok::kw_const:
3508 case tok::semi:
3509 case tok::kw_class:
3510 case tok::kw_struct:
3511 case tok::kw_union:
3512 return;
3513
3514 case tok::l_brace:
3515 // Potential function body.
3516 return;
3517
3518 case tok::ampamp:
3519 case tok::pipepipe:
3520 FormatTok->setFinalizedType(TT_BinaryOperator);
3521 nextToken();
3522 LambdaNextTimeAllowed = true;
3523 TopLevelParensAllowed = true;
3524 break;
3525
3526 case tok::comma:
3527 case tok::comment:
3528 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3529 nextToken();
3530 break;
3531
3532 case tok::kw_sizeof:
3533 case tok::greater:
3534 case tok::greaterequal:
3535 case tok::greatergreater:
3536 case tok::less:
3537 case tok::lessequal:
3538 case tok::lessless:
3539 case tok::equalequal:
3540 case tok::exclaim:
3541 case tok::exclaimequal:
3542 case tok::plus:
3543 case tok::minus:
3544 case tok::star:
3545 case tok::slash:
3546 LambdaNextTimeAllowed = true;
3547 TopLevelParensAllowed = true;
3548 // Just eat them.
3549 nextToken();
3550 break;
3551
3552 case tok::numeric_constant:
3553 case tok::coloncolon:
3554 case tok::kw_true:
3555 case tok::kw_false:
3556 TopLevelParensAllowed = false;
3557 // Just eat them.
3558 nextToken();
3559 break;
3560
3561 case tok::kw_static_cast:
3562 case tok::kw_const_cast:
3563 case tok::kw_reinterpret_cast:
3564 case tok::kw_dynamic_cast:
3565 nextToken();
3566 if (FormatTok->isNot(tok::less))
3567 return;
3568
3569 nextToken();
3570 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3571 /*ClosingBraceKind=*/tok::greater);
3572 break;
3573
3574 default:
3575 if (!FormatTok->Tok.getIdentifierInfo()) {
3576 // Identifiers are part of the default case, we check for more then
3577 // tok::identifier to handle builtin type traits.
3578 return;
3579 }
3580
3581 // We need to differentiate identifiers for a template deduction guide,
3582 // variables, or function return types (the constraint expression has
3583 // ended before that), and basically all other cases. But it's easier to
3584 // check the other way around.
3585 assert(FormatTok->Previous);
3586 switch (FormatTok->Previous->Tok.getKind()) {
3587 case tok::coloncolon: // Nested identifier.
3588 case tok::ampamp: // Start of a function or variable for the
3589 case tok::pipepipe: // constraint expression. (binary)
3590 case tok::exclaim: // The same as above, but unary.
3591 case tok::kw_requires: // Initial identifier of a requires clause.
3592 case tok::equal: // Initial identifier of a concept declaration.
3593 break;
3594 default:
3595 return;
3596 }
3597
3598 // Read identifier with optional template declaration.
3599 nextToken();
3600 if (FormatTok->is(tok::less)) {
3601 nextToken();
3602 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3603 /*ClosingBraceKind=*/tok::greater);
3604 }
3605 TopLevelParensAllowed = false;
3606 break;
3607 }
3608 } while (!eof());
3609}
3610
3611bool UnwrappedLineParser::parseEnum() {
3612 const FormatToken &InitialToken = *FormatTok;
3613
3614 // Won't be 'enum' for NS_ENUMs.
3615 if (FormatTok->is(tok::kw_enum))
3616 nextToken();
3617
3618 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3619 // declarations. An "enum" keyword followed by a colon would be a syntax
3620 // error and thus assume it is just an identifier.
3621 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3622 return false;
3623
3624 // In protobuf, "enum" can be used as a field name.
3625 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3626 return false;
3627
3628 // Eat up enum class ...
3629 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3630 nextToken();
3631
3632 while (FormatTok->Tok.getIdentifierInfo() ||
3633 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3634 tok::greater, tok::comma, tok::question,
3635 tok::l_square, tok::r_square)) {
3636 if (Style.isVerilog()) {
3637 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3638 nextToken();
3639 // In Verilog the base type can have dimensions.
3640 while (FormatTok->is(tok::l_square))
3641 parseSquare();
3642 } else {
3643 nextToken();
3644 }
3645 // We can have macros or attributes in between 'enum' and the enum name.
3646 if (FormatTok->is(tok::l_paren))
3647 parseParens();
3648 assert(FormatTok->isNot(TT_AttributeSquare));
3649 if (FormatTok->is(tok::identifier)) {
3650 nextToken();
3651 // If there are two identifiers in a row, this is likely an elaborate
3652 // return type. In Java, this can be "implements", etc.
3653 if (Style.isCpp() && FormatTok->is(tok::identifier))
3654 return false;
3655 }
3656 }
3657
3658 // Just a declaration or something is wrong.
3659 if (FormatTok->isNot(tok::l_brace))
3660 return true;
3661 FormatTok->setFinalizedType(TT_EnumLBrace);
3662 FormatTok->setBlockKind(BK_Block);
3663
3664 if (Style.Language == FormatStyle::LK_Java) {
3665 // Java enums are different.
3666 parseJavaEnumBody();
3667 return true;
3668 }
3669 if (Style.Language == FormatStyle::LK_Proto) {
3670 parseBlock(/*MustBeDeclaration=*/true);
3671 return true;
3672 }
3673
3674 if (!Style.AllowShortEnumsOnASingleLine &&
3675 ShouldBreakBeforeBrace(Style, InitialToken)) {
3676 addUnwrappedLine();
3677 }
3678 // Parse enum body.
3679 nextToken();
3680 if (!Style.AllowShortEnumsOnASingleLine) {
3681 addUnwrappedLine();
3682 Line->Level += 1;
3683 }
3684 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3685 /*IsEnum=*/true);
3687 Line->Level -= 1;
3688 if (HasError) {
3689 if (FormatTok->is(tok::semi))
3690 nextToken();
3691 addUnwrappedLine();
3692 }
3693 return true;
3694
3695 // There is no addUnwrappedLine() here so that we fall through to parsing a
3696 // structural element afterwards. Thus, in "enum A {} n, m;",
3697 // "} n, m;" will end up in one unwrapped line.
3698}
3699
3700bool UnwrappedLineParser::parseStructLike() {
3701 // parseRecord falls through and does not yet add an unwrapped line as a
3702 // record declaration or definition can start a structural element.
3703 parseRecord();
3704 // This does not apply to Java, JavaScript and C#.
3705 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3706 Style.isCSharp()) {
3707 if (FormatTok->is(tok::semi))
3708 nextToken();
3709 addUnwrappedLine();
3710 return true;
3711 }
3712 return false;
3713}
3714
3715namespace {
3716// A class used to set and restore the Token position when peeking
3717// ahead in the token source.
3718class ScopedTokenPosition {
3719 unsigned StoredPosition;
3720 FormatTokenSource *Tokens;
3721
3722public:
3723 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3724 assert(Tokens && "Tokens expected to not be null");
3725 StoredPosition = Tokens->getPosition();
3726 }
3727
3728 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3729};
3730} // namespace
3731
3732// Look to see if we have [[ by looking ahead, if
3733// its not then rewind to the original position.
3734bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3735 ScopedTokenPosition AutoPosition(Tokens);
3736 FormatToken *Tok = Tokens->getNextToken();
3737 // We already read the first [ check for the second.
3738 if (Tok->isNot(tok::l_square))
3739 return false;
3740 // Double check that the attribute is just something
3741 // fairly simple.
3742 while (Tok->isNot(tok::eof)) {
3743 if (Tok->is(tok::r_square))
3744 break;
3745 Tok = Tokens->getNextToken();
3746 }
3747 if (Tok->is(tok::eof))
3748 return false;
3749 Tok = Tokens->getNextToken();
3750 if (Tok->isNot(tok::r_square))
3751 return false;
3752 Tok = Tokens->getNextToken();
3753 if (Tok->is(tok::semi))
3754 return false;
3755 return true;
3756}
3757
3758void UnwrappedLineParser::parseJavaEnumBody() {
3759 assert(FormatTok->is(tok::l_brace));
3760 const FormatToken *OpeningBrace = FormatTok;
3761
3762 // Determine whether the enum is simple, i.e. does not have a semicolon or
3763 // constants with class bodies. Simple enums can be formatted like braced
3764 // lists, contracted to a single line, etc.
3765 unsigned StoredPosition = Tokens->getPosition();
3766 bool IsSimple = true;
3767 FormatToken *Tok = Tokens->getNextToken();
3768 while (Tok->isNot(tok::eof)) {
3769 if (Tok->is(tok::r_brace))
3770 break;
3771 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3772 IsSimple = false;
3773 break;
3774 }
3775 // FIXME: This will also mark enums with braces in the arguments to enum
3776 // constants as "not simple". This is probably fine in practice, though.
3777 Tok = Tokens->getNextToken();
3778 }
3779 FormatTok = Tokens->setPosition(StoredPosition);
3780
3781 if (IsSimple) {
3782 nextToken();
3783 parseBracedList();
3784 addUnwrappedLine();
3785 return;
3786 }
3787
3788 // Parse the body of a more complex enum.
3789 // First add a line for everything up to the "{".
3790 nextToken();
3791 addUnwrappedLine();
3792 ++Line->Level;
3793
3794 // Parse the enum constants.
3795 while (!eof()) {
3796 if (FormatTok->is(tok::l_brace)) {
3797 // Parse the constant's class body.
3798 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3799 /*MunchSemi=*/false);
3800 } else if (FormatTok->is(tok::l_paren)) {
3801 parseParens();
3802 } else if (FormatTok->is(tok::comma)) {
3803 nextToken();
3804 addUnwrappedLine();
3805 } else if (FormatTok->is(tok::semi)) {
3806 nextToken();
3807 addUnwrappedLine();
3808 break;
3809 } else if (FormatTok->is(tok::r_brace)) {
3810 addUnwrappedLine();
3811 break;
3812 } else {
3813 nextToken();
3814 }
3815 }
3816
3817 // Parse the class body after the enum's ";" if any.
3818 parseLevel(OpeningBrace);
3819 nextToken();
3820 --Line->Level;
3821 addUnwrappedLine();
3822}
3823
3824void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3825 const FormatToken &InitialToken = *FormatTok;
3826 nextToken();
3827
3828 // The actual identifier can be a nested name specifier, and in macros
3829 // it is often token-pasted.
3830 // An [[attribute]] can be before the identifier.
3831 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3832 tok::kw_alignas, tok::l_square) ||
3833 FormatTok->isAttribute() ||
3834 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3835 FormatTok->isOneOf(tok::period, tok::comma))) {
3836 if (Style.isJavaScript() &&
3837 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3838 // JavaScript/TypeScript supports inline object types in
3839 // extends/implements positions:
3840 // class Foo implements {bar: number} { }
3841 nextToken();
3842 if (FormatTok->is(tok::l_brace)) {
3843 tryToParseBracedList();
3844 continue;
3845 }
3846 }
3847 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3848 continue;
3849 bool IsNonMacroIdentifier =
3850 FormatTok->is(tok::identifier) &&
3851 FormatTok->TokenText != FormatTok->TokenText.upper();
3852 nextToken();
3853 // We can have macros in between 'class' and the class name.
3854 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3855 parseParens();
3856 }
3857
3858 // Note that parsing away template declarations here leads to incorrectly
3859 // accepting function declarations as record declarations.
3860 // In general, we cannot solve this problem. Consider:
3861 // class A<int> B() {}
3862 // which can be a function definition or a class definition when B() is a
3863 // macro. If we find enough real-world cases where this is a problem, we
3864 // can parse for the 'template' keyword in the beginning of the statement,
3865 // and thus rule out the record production in case there is no template
3866 // (this would still leave us with an ambiguity between template function
3867 // and class declarations).
3868 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3869 do {
3870 if (FormatTok->is(tok::l_brace)) {
3871 calculateBraceTypes(/*ExpectClassBody=*/true);
3872 if (!tryToParseBracedList())
3873 break;
3874 }
3875 if (FormatTok->is(tok::l_square)) {
3876 FormatToken *Previous = FormatTok->Previous;
3877 if (!Previous ||
3878 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3879 // Don't try parsing a lambda if we had a closing parenthesis before,
3880 // it was probably a pointer to an array: int (*)[].
3881 if (!tryToParseLambda())
3882 continue;
3883 } else {
3884 parseSquare();
3885 continue;
3886 }
3887 }
3888 if (FormatTok->is(tok::semi))
3889 return;
3890 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3891 addUnwrappedLine();
3892 nextToken();
3893 parseCSharpGenericTypeConstraint();
3894 break;
3895 }
3896 nextToken();
3897 } while (!eof());
3898 }
3899
3900 auto GetBraceType = [](const FormatToken &RecordTok) {
3901 switch (RecordTok.Tok.getKind()) {
3902 case tok::kw_class:
3903 return TT_ClassLBrace;
3904 case tok::kw_struct:
3905 return TT_StructLBrace;
3906 case tok::kw_union:
3907 return TT_UnionLBrace;
3908 default:
3909 // Useful for e.g. interface.
3910 return TT_RecordLBrace;
3911 }
3912 };
3913 if (FormatTok->is(tok::l_brace)) {
3914 FormatTok->setFinalizedType(GetBraceType(InitialToken));
3915 if (ParseAsExpr) {
3916 parseChildBlock();
3917 } else {
3918 if (ShouldBreakBeforeBrace(Style, InitialToken))
3919 addUnwrappedLine();
3920
3921 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3922 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3923 }
3924 }
3925 // There is no addUnwrappedLine() here so that we fall through to parsing a
3926 // structural element afterwards. Thus, in "class A {} n, m;",
3927 // "} n, m;" will end up in one unwrapped line.
3928}
3929
3930void UnwrappedLineParser::parseObjCMethod() {
3931 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3932 "'(' or identifier expected.");
3933 do {
3934 if (FormatTok->is(tok::semi)) {
3935 nextToken();
3936 addUnwrappedLine();
3937 return;
3938 } else if (FormatTok->is(tok::l_brace)) {
3939 if (Style.BraceWrapping.AfterFunction)
3940 addUnwrappedLine();
3941 parseBlock();
3942 addUnwrappedLine();
3943 return;
3944 } else {
3945 nextToken();
3946 }
3947 } while (!eof());
3948}
3949
3950void UnwrappedLineParser::parseObjCProtocolList() {
3951 assert(FormatTok->is(tok::less) && "'<' expected.");
3952 do {
3953 nextToken();
3954 // Early exit in case someone forgot a close angle.
3955 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3956 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3957 return;
3958 }
3959 } while (!eof() && FormatTok->isNot(tok::greater));
3960 nextToken(); // Skip '>'.
3961}
3962
3963void UnwrappedLineParser::parseObjCUntilAtEnd() {
3964 do {
3965 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3966 nextToken();
3967 addUnwrappedLine();
3968 break;
3969 }
3970 if (FormatTok->is(tok::l_brace)) {
3971 parseBlock();
3972 // In ObjC interfaces, nothing should be following the "}".
3973 addUnwrappedLine();
3974 } else if (FormatTok->is(tok::r_brace)) {
3975 // Ignore stray "}". parseStructuralElement doesn't consume them.
3976 nextToken();
3977 addUnwrappedLine();
3978 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3979 nextToken();
3980 parseObjCMethod();
3981 } else {
3982 parseStructuralElement();
3983 }
3984 } while (!eof());
3985}
3986
3987void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3988 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3989 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3990 nextToken();
3991 nextToken(); // interface name
3992
3993 // @interface can be followed by a lightweight generic
3994 // specialization list, then either a base class or a category.
3995 if (FormatTok->is(tok::less))
3996 parseObjCLightweightGenerics();
3997 if (FormatTok->is(tok::colon)) {
3998 nextToken();
3999 nextToken(); // base class name
4000 // The base class can also have lightweight generics applied to it.
4001 if (FormatTok->is(tok::less))
4002 parseObjCLightweightGenerics();
4003 } else if (FormatTok->is(tok::l_paren)) {
4004 // Skip category, if present.
4005 parseParens();
4006 }
4007
4008 if (FormatTok->is(tok::less))
4009 parseObjCProtocolList();
4010
4011 if (FormatTok->is(tok::l_brace)) {
4013 addUnwrappedLine();
4014 parseBlock(/*MustBeDeclaration=*/true);
4015 }
4016
4017 // With instance variables, this puts '}' on its own line. Without instance
4018 // variables, this ends the @interface line.
4019 addUnwrappedLine();
4020
4021 parseObjCUntilAtEnd();
4022}
4023
4024void UnwrappedLineParser::parseObjCLightweightGenerics() {
4025 assert(FormatTok->is(tok::less));
4026 // Unlike protocol lists, generic parameterizations support
4027 // nested angles:
4028 //
4029 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4030 // NSObject <NSCopying, NSSecureCoding>
4031 //
4032 // so we need to count how many open angles we have left.
4033 unsigned NumOpenAngles = 1;
4034 do {
4035 nextToken();
4036 // Early exit in case someone forgot a close angle.
4037 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4038 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4039 break;
4040 }
4041 if (FormatTok->is(tok::less)) {
4042 ++NumOpenAngles;
4043 } else if (FormatTok->is(tok::greater)) {
4044 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4045 --NumOpenAngles;
4046 }
4047 } while (!eof() && NumOpenAngles != 0);
4048 nextToken(); // Skip '>'.
4049}
4050
4051// Returns true for the declaration/definition form of @protocol,
4052// false for the expression form.
4053bool UnwrappedLineParser::parseObjCProtocol() {
4054 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4055 nextToken();
4056
4057 if (FormatTok->is(tok::l_paren)) {
4058 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4059 return false;
4060 }
4061
4062 // The definition/declaration form,
4063 // @protocol Foo
4064 // - (int)someMethod;
4065 // @end
4066
4067 nextToken(); // protocol name
4068
4069 if (FormatTok->is(tok::less))
4070 parseObjCProtocolList();
4071
4072 // Check for protocol declaration.
4073 if (FormatTok->is(tok::semi)) {
4074 nextToken();
4075 addUnwrappedLine();
4076 return true;
4077 }
4078
4079 addUnwrappedLine();
4080 parseObjCUntilAtEnd();
4081 return true;
4082}
4083
4084void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4085 bool IsImport = FormatTok->is(Keywords.kw_import);
4086 assert(IsImport || FormatTok->is(tok::kw_export));
4087 nextToken();
4088
4089 // Consume the "default" in "export default class/function".
4090 if (FormatTok->is(tok::kw_default))
4091 nextToken();
4092
4093 // Consume "async function", "function" and "default function", so that these
4094 // get parsed as free-standing JS functions, i.e. do not require a trailing
4095 // semicolon.
4096 if (FormatTok->is(Keywords.kw_async))
4097 nextToken();
4098 if (FormatTok->is(Keywords.kw_function)) {
4099 nextToken();
4100 return;
4101 }
4102
4103 // For imports, `export *`, `export {...}`, consume the rest of the line up
4104 // to the terminating `;`. For everything else, just return and continue
4105 // parsing the structural element, i.e. the declaration or expression for
4106 // `export default`.
4107 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4108 !FormatTok->isStringLiteral() &&
4109 !(FormatTok->is(Keywords.kw_type) &&
4110 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4111 return;
4112 }
4113
4114 while (!eof()) {
4115 if (FormatTok->is(tok::semi))
4116 return;
4117 if (Line->Tokens.empty()) {
4118 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4119 // import statement should terminate.
4120 return;
4121 }
4122 if (FormatTok->is(tok::l_brace)) {
4123 FormatTok->setBlockKind(BK_Block);
4124 nextToken();
4125 parseBracedList();
4126 } else {
4127 nextToken();
4128 }
4129 }
4130}
4131
4132void UnwrappedLineParser::parseStatementMacro() {
4133 nextToken();
4134 if (FormatTok->is(tok::l_paren))
4135 parseParens();
4136 if (FormatTok->is(tok::semi))
4137 nextToken();
4138 addUnwrappedLine();
4139}
4140
4141void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4142 // consume things like a::`b.c[d:e] or a::*
4143 while (true) {
4144 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4145 tok::coloncolon, tok::hash) ||
4146 Keywords.isVerilogIdentifier(*FormatTok)) {
4147 nextToken();
4148 } else if (FormatTok->is(tok::l_square)) {
4149 parseSquare();
4150 } else {
4151 break;
4152 }
4153 }
4154}
4155
4156void UnwrappedLineParser::parseVerilogSensitivityList() {
4157 if (FormatTok->isNot(tok::at))
4158 return;
4159 nextToken();
4160 // A block event expression has 2 at signs.
4161 if (FormatTok->is(tok::at))
4162 nextToken();
4163 switch (FormatTok->Tok.getKind()) {
4164 case tok::star:
4165 nextToken();
4166 break;
4167 case tok::l_paren:
4168 parseParens();
4169 break;
4170 default:
4171 parseVerilogHierarchyIdentifier();
4172 break;
4173 }
4174}
4175
4176unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4177 unsigned AddLevels = 0;
4178
4179 if (FormatTok->is(Keywords.kw_clocking)) {
4180 nextToken();
4181 if (Keywords.isVerilogIdentifier(*FormatTok))
4182 nextToken();
4183 parseVerilogSensitivityList();
4184 if (FormatTok->is(tok::semi))
4185 nextToken();
4186 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4187 Keywords.kw_casez, Keywords.kw_randcase,
4188 Keywords.kw_randsequence)) {
4189 if (Style.IndentCaseLabels)
4190 AddLevels++;
4191 nextToken();
4192 if (FormatTok->is(tok::l_paren)) {
4193 FormatTok->setFinalizedType(TT_ConditionLParen);
4194 parseParens();
4195 }
4196 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4197 nextToken();
4198 // The case header has no semicolon.
4199 } else {
4200 // "module" etc.
4201 nextToken();
4202 // all the words like the name of the module and specifiers like
4203 // "automatic" and the width of function return type
4204 while (true) {
4205 if (FormatTok->is(tok::l_square)) {
4206 auto Prev = FormatTok->getPreviousNonComment();
4207 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4208 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4209 parseSquare();
4210 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4211 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4212 nextToken();
4213 } else {
4214 break;
4215 }
4216 }
4217
4218 auto NewLine = [this]() {
4219 addUnwrappedLine();
4220 Line->IsContinuation = true;
4221 };
4222
4223 // package imports
4224 while (FormatTok->is(Keywords.kw_import)) {
4225 NewLine();
4226 nextToken();
4227 parseVerilogHierarchyIdentifier();
4228 if (FormatTok->is(tok::semi))
4229 nextToken();
4230 }
4231
4232 // parameters and ports
4233 if (FormatTok->is(Keywords.kw_verilogHash)) {
4234 NewLine();
4235 nextToken();
4236 if (FormatTok->is(tok::l_paren)) {
4237 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4238 parseParens();
4239 }
4240 }
4241 if (FormatTok->is(tok::l_paren)) {
4242 NewLine();
4243 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4244 parseParens();
4245 }
4246
4247 // extends and implements
4248 if (FormatTok->is(Keywords.kw_extends)) {
4249 NewLine();
4250 nextToken();
4251 parseVerilogHierarchyIdentifier();
4252 if (FormatTok->is(tok::l_paren))
4253 parseParens();
4254 }
4255 if (FormatTok->is(Keywords.kw_implements)) {
4256 NewLine();
4257 do {
4258 nextToken();
4259 parseVerilogHierarchyIdentifier();
4260 } while (FormatTok->is(tok::comma));
4261 }
4262
4263 // Coverage event for cover groups.
4264 if (FormatTok->is(tok::at)) {
4265 NewLine();
4266 parseVerilogSensitivityList();
4267 }
4268
4269 if (FormatTok->is(tok::semi))
4270 nextToken(/*LevelDifference=*/1);
4271 addUnwrappedLine();
4272 }
4273
4274 return AddLevels;
4275}
4276
4277void UnwrappedLineParser::parseVerilogTable() {
4278 assert(FormatTok->is(Keywords.kw_table));
4279 nextToken(/*LevelDifference=*/1);
4280 addUnwrappedLine();
4281
4282 auto InitialLevel = Line->Level++;
4283 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4284 FormatToken *Tok = FormatTok;
4285 nextToken();
4286 if (Tok->is(tok::semi))
4287 addUnwrappedLine();
4288 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4289 Tok->setFinalizedType(TT_VerilogTableItem);
4290 }
4291 Line->Level = InitialLevel;
4292 nextToken(/*LevelDifference=*/-1);
4293 addUnwrappedLine();
4294}
4295
4296void UnwrappedLineParser::parseVerilogCaseLabel() {
4297 // The label will get unindented in AnnotatingParser. If there are no leading
4298 // spaces, indent the rest here so that things inside the block will be
4299 // indented relative to things outside. We don't use parseLabel because we
4300 // don't know whether this colon is a label or a ternary expression at this
4301 // point.
4302 auto OrigLevel = Line->Level;
4303 auto FirstLine = CurrentLines->size();
4304 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4305 ++Line->Level;
4306 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4307 --Line->Level;
4308 parseStructuralElement();
4309 // Restore the indentation in both the new line and the line that has the
4310 // label.
4311 if (CurrentLines->size() > FirstLine)
4312 (*CurrentLines)[FirstLine].Level = OrigLevel;
4313 Line->Level = OrigLevel;
4314}
4315
4316bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4317 for (const auto &N : Line.Tokens) {
4318 if (N.Tok->MacroCtx)
4319 return true;
4320 for (const UnwrappedLine &Child : N.Children)
4321 if (containsExpansion(Child))
4322 return true;
4323 }
4324 return false;
4325}
4326
4327void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4328 if (Line->Tokens.empty())
4329 return;
4330 LLVM_DEBUG({
4331 if (!parsingPPDirective()) {
4332 llvm::dbgs() << "Adding unwrapped line:\n";
4333 printDebugInfo(*Line);
4334 }
4335 });
4336
4337 // If this line closes a block when in Whitesmiths mode, remember that
4338 // information so that the level can be decreased after the line is added.
4339 // This has to happen after the addition of the line since the line itself
4340 // needs to be indented.
4341 bool ClosesWhitesmithsBlock =
4342 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4344
4345 // If the current line was expanded from a macro call, we use it to
4346 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4347 // line and the unexpanded token stream.
4348 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4349 if (!Reconstruct)
4350 Reconstruct.emplace(Line->Level, Unexpanded);
4351 Reconstruct->addLine(*Line);
4352
4353 // While the reconstructed unexpanded lines are stored in the normal
4354 // flow of lines, the expanded lines are stored on the side to be analyzed
4355 // in an extra step.
4356 CurrentExpandedLines.push_back(std::move(*Line));
4357
4358 if (Reconstruct->finished()) {
4359 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4360 assert(!Reconstructed.Tokens.empty() &&
4361 "Reconstructed must at least contain the macro identifier.");
4362 assert(!parsingPPDirective());
4363 LLVM_DEBUG({
4364 llvm::dbgs() << "Adding unexpanded line:\n";
4365 printDebugInfo(Reconstructed);
4366 });
4367 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4368 Lines.push_back(std::move(Reconstructed));
4369 CurrentExpandedLines.clear();
4370 Reconstruct.reset();
4371 }
4372 } else {
4373 // At the top level we only get here when no unexpansion is going on, or
4374 // when conditional formatting led to unfinished macro reconstructions.
4375 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4376 CurrentLines->push_back(std::move(*Line));
4377 }
4378 Line->Tokens.clear();
4379 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4380 Line->FirstStartColumn = 0;
4381 Line->IsContinuation = false;
4382
4383 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4384 --Line->Level;
4385 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4386 CurrentLines->append(
4387 std::make_move_iterator(PreprocessorDirectives.begin()),
4388 std::make_move_iterator(PreprocessorDirectives.end()));
4389 PreprocessorDirectives.clear();
4390 }
4391 // Disconnect the current token from the last token on the previous line.
4392 FormatTok->Previous = nullptr;
4393}
4394
4395bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4396
4397bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4398 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4399 FormatTok.NewlinesBefore > 0;
4400}
4401
4402// Checks if \p FormatTok is a line comment that continues the line comment
4403// section on \p Line.
4404static bool
4406 const UnwrappedLine &Line,
4407 const llvm::Regex &CommentPragmasRegex) {
4408 if (Line.Tokens.empty())
4409 return false;
4410
4411 StringRef IndentContent = FormatTok.TokenText;
4412 if (FormatTok.TokenText.startswith("//") ||
4413 FormatTok.TokenText.startswith("/*")) {
4414 IndentContent = FormatTok.TokenText.substr(2);
4415 }
4416 if (CommentPragmasRegex.match(IndentContent))
4417 return false;
4418
4419 // If Line starts with a line comment, then FormatTok continues the comment
4420 // section if its original column is greater or equal to the original start
4421 // column of the line.
4422 //
4423 // Define the min column token of a line as follows: if a line ends in '{' or
4424 // contains a '{' followed by a line comment, then the min column token is
4425 // that '{'. Otherwise, the min column token of the line is the first token of
4426 // the line.
4427 //
4428 // If Line starts with a token other than a line comment, then FormatTok
4429 // continues the comment section if its original column is greater than the
4430 // original start column of the min column token of the line.
4431 //
4432 // For example, the second line comment continues the first in these cases:
4433 //
4434 // // first line
4435 // // second line
4436 //
4437 // and:
4438 //
4439 // // first line
4440 // // second line
4441 //
4442 // and:
4443 //
4444 // int i; // first line
4445 // // second line
4446 //
4447 // and:
4448 //
4449 // do { // first line
4450 // // second line
4451 // int i;
4452 // } while (true);
4453 //
4454 // and:
4455 //
4456 // enum {
4457 // a, // first line
4458 // // second line
4459 // b
4460 // };
4461 //
4462 // The second line comment doesn't continue the first in these cases:
4463 //
4464 // // first line
4465 // // second line
4466 //
4467 // and:
4468 //
4469 // int i; // first line
4470 // // second line
4471 //
4472 // and:
4473 //
4474 // do { // first line
4475 // // second line
4476 // int i;
4477 // } while (true);
4478 //
4479 // and:
4480 //
4481 // enum {
4482 // a, // first line
4483 // // second line
4484 // };
4485 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4486
4487 // Scan for '{//'. If found, use the column of '{' as a min column for line
4488 // comment section continuation.
4489 const FormatToken *PreviousToken = nullptr;
4490 for (const UnwrappedLineNode &Node : Line.Tokens) {
4491 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4492 isLineComment(*Node.Tok)) {
4493 MinColumnToken = PreviousToken;
4494 break;
4495 }
4496 PreviousToken = Node.Tok;
4497
4498 // Grab the last newline preceding a token in this unwrapped line.
4499 if (Node.Tok->NewlinesBefore > 0)
4500 MinColumnToken = Node.Tok;
4501 }
4502 if (PreviousToken && PreviousToken->is(tok::l_brace))
4503 MinColumnToken = PreviousToken;
4504
4505 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4506 MinColumnToken);
4507}
4508
4509void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4510 bool JustComments = Line->Tokens.empty();
4511 for (FormatToken *Tok : CommentsBeforeNextToken) {
4512 // Line comments that belong to the same line comment section are put on the
4513 // same line since later we might want to reflow content between them.
4514 // Additional fine-grained breaking of line comment sections is controlled
4515 // by the class BreakableLineCommentSection in case it is desirable to keep
4516 // several line comment sections in the same unwrapped line.
4517 //
4518 // FIXME: Consider putting separate line comment sections as children to the
4519 // unwrapped line instead.
4520 Tok->ContinuesLineCommentSection =
4521 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4522 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4523 addUnwrappedLine();
4524 pushToken(Tok);
4525 }
4526 if (NewlineBeforeNext && JustComments)
4527 addUnwrappedLine();
4528 CommentsBeforeNextToken.clear();
4529}
4530
4531void UnwrappedLineParser::nextToken(int LevelDifference) {
4532 if (eof())
4533 return;
4534 flushComments(isOnNewLine(*FormatTok));
4535 pushToken(FormatTok);
4536 FormatToken *Previous = FormatTok;
4537 if (!Style.isJavaScript())
4538 readToken(LevelDifference);
4539 else
4540 readTokenWithJavaScriptASI();
4541 FormatTok->Previous = Previous;
4542 if (Style.isVerilog()) {
4543 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4544 // keywords like `begin`, we can't treat them the same as left braces
4545 // because some contexts require one of them. For example structs use
4546 // braces and if blocks use keywords, and a left brace can occur in an if
4547 // statement, but it is not a block. For keywords like `end`, we simply
4548 // treat them the same as right braces.
4549 if (Keywords.isVerilogEnd(*FormatTok))
4550 FormatTok->Tok.setKind(tok::r_brace);
4551 }
4552}
4553
4554void UnwrappedLineParser::distributeComments(
4555 const SmallVectorImpl<FormatToken *> &Comments,
4556 const FormatToken *NextTok) {
4557 // Whether or not a line comment token continues a line is controlled by
4558 // the method continuesLineCommentSection, with the following caveat:
4559 //
4560 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4561 // that each comment line from the trail is aligned with the next token, if
4562 // the next token exists. If a trail exists, the beginning of the maximal
4563 // trail is marked as a start of a new comment section.
4564 //
4565 // For example in this code:
4566 //
4567 // int a; // line about a
4568 // // line 1 about b
4569 // // line 2 about b
4570 // int b;
4571 //
4572 // the two lines about b form a maximal trail, so there are two sections, the
4573 // first one consisting of the single comment "// line about a" and the
4574 // second one consisting of the next two comments.
4575 if (Comments.empty())
4576 return;
4577 bool ShouldPushCommentsInCurrentLine = true;
4578 bool HasTrailAlignedWithNextToken = false;
4579 unsigned StartOfTrailAlignedWithNextToken = 0;
4580 if (NextTok) {
4581 // We are skipping the first element intentionally.
4582 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4583 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4584 HasTrailAlignedWithNextToken = true;
4585 StartOfTrailAlignedWithNextToken = i;
4586 }
4587 }
4588 }
4589 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4590 FormatToken *FormatTok = Comments[i];
4591 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4592 FormatTok->ContinuesLineCommentSection = false;
4593 } else {
4594 FormatTok->ContinuesLineCommentSection =
4595 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4596 }
4597 if (!FormatTok->ContinuesLineCommentSection &&
4598 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4599 ShouldPushCommentsInCurrentLine = false;
4600 }
4601 if (ShouldPushCommentsInCurrentLine)
4602 pushToken(FormatTok);
4603 else
4604 CommentsBeforeNextToken.push_back(FormatTok);
4605 }
4606}
4607
4608void UnwrappedLineParser::readToken(int LevelDifference) {
4609 SmallVector<FormatToken *, 1> Comments;
4610 bool PreviousWasComment = false;
4611 bool FirstNonCommentOnLine = false;
4612 do {
4613 FormatTok = Tokens->getNextToken();
4614 assert(FormatTok);
4615 while (FormatTok->getType() == TT_ConflictStart ||
4616 FormatTok->getType() == TT_ConflictEnd ||
4617 FormatTok->getType() == TT_ConflictAlternative) {
4618 if (FormatTok->getType() == TT_ConflictStart)
4619 conditionalCompilationStart(/*Unreachable=*/false);
4620 else if (FormatTok->getType() == TT_ConflictAlternative)
4621 conditionalCompilationAlternative();
4622 else if (FormatTok->getType() == TT_ConflictEnd)
4623 conditionalCompilationEnd();
4624 FormatTok = Tokens->getNextToken();
4625 FormatTok->MustBreakBefore = true;
4626 }
4627
4628 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4629 const FormatToken &Tok,
4630 bool PreviousWasComment) {
4631 auto IsFirstOnLine = [](const FormatToken &Tok) {
4632 return Tok.HasUnescapedNewline || Tok.IsFirst;
4633 };
4634
4635 // Consider preprocessor directives preceded by block comments as first
4636 // on line.
4637 if (PreviousWasComment)
4638 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4639 return IsFirstOnLine(Tok);
4640 };
4641
4642 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4643 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4644 PreviousWasComment = FormatTok->is(tok::comment);
4645
4646 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4647 (!Style.isVerilog() ||
4648 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4649 FirstNonCommentOnLine) {
4650 distributeComments(Comments, FormatTok);
4651 Comments.clear();
4652 // If there is an unfinished unwrapped line, we flush the preprocessor
4653 // directives only after that unwrapped line was finished later.
4654 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4655 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4656 assert((LevelDifference >= 0 ||
4657 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4658 "LevelDifference makes Line->Level negative");
4659 Line->Level += LevelDifference;
4660 // Comments stored before the preprocessor directive need to be output
4661 // before the preprocessor directive, at the same level as the
4662 // preprocessor directive, as we consider them to apply to the directive.
4664 PPBranchLevel > 0) {
4665 Line->Level += PPBranchLevel;
4666 }
4667 flushComments(isOnNewLine(*FormatTok));
4668 parsePPDirective();
4669 PreviousWasComment = FormatTok->is(tok::comment);
4670 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4671 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4672 }
4673
4674 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4675 !Line->InPPDirective) {
4676 continue;
4677 }
4678
4679 if (FormatTok->is(tok::identifier) &&
4680 Macros.defined(FormatTok->TokenText) &&
4681 // FIXME: Allow expanding macros in preprocessor directives.
4682 !Line->InPPDirective) {
4683 FormatToken *ID = FormatTok;
4684 unsigned Position = Tokens->getPosition();
4685
4686 // To correctly parse the code, we need to replace the tokens of the macro
4687 // call with its expansion.
4688 auto PreCall = std::move(Line);
4689 Line.reset(new UnwrappedLine);
4690 bool OldInExpansion = InExpansion;
4691 InExpansion = true;
4692 // We parse the macro call into a new line.
4693 auto Args = parseMacroCall();
4694 InExpansion = OldInExpansion;
4695 assert(Line->Tokens.front().Tok == ID);
4696 // And remember the unexpanded macro call tokens.
4697 auto UnexpandedLine = std::move(Line);
4698 // Reset to the old line.
4699 Line = std::move(PreCall);
4700
4701 LLVM_DEBUG({
4702 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4703 if (Args) {
4704 llvm::dbgs() << "(";
4705 for (const auto &Arg : Args.value())
4706 for (const auto &T : Arg)
4707 llvm::dbgs() << T->TokenText << " ";
4708 llvm::dbgs() << ")";
4709 }
4710 llvm::dbgs() << "\n";
4711 });
4712 if (Macros.objectLike(ID->TokenText) && Args &&
4713 !Macros.hasArity(ID->TokenText, Args->size())) {
4714 // The macro is either
4715 // - object-like, but we got argumnets, or
4716 // - overloaded to be both object-like and function-like, but none of
4717 // the function-like arities match the number of arguments.
4718 // Thus, expand as object-like macro.
4719 LLVM_DEBUG(llvm::dbgs()
4720 << "Macro \"" << ID->TokenText
4721 << "\" not overloaded for arity " << Args->size()
4722 << "or not function-like, using object-like overload.");
4723 Args.reset();
4724 UnexpandedLine->Tokens.resize(1);
4725 Tokens->setPosition(Position);
4726 nextToken();
4727 assert(!Args && Macros.objectLike(ID->TokenText));
4728 }
4729 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4730 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4731 // Next, we insert the expanded tokens in the token stream at the
4732 // current position, and continue parsing.
4733 Unexpanded[ID] = std::move(UnexpandedLine);
4734 SmallVector<FormatToken *, 8> Expansion =
4735 Macros.expand(ID, std::move(Args));
4736 if (!Expansion.empty())
4737 FormatTok = Tokens->insertTokens(Expansion);
4738
4739 LLVM_DEBUG({
4740 llvm::dbgs() << "Expanded: ";
4741 for (const auto &T : Expansion)
4742 llvm::dbgs() << T->TokenText << " ";
4743 llvm::dbgs() << "\n";
4744 });
4745 } else {
4746 LLVM_DEBUG({
4747 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4748 << "\", because it was used ";
4749 if (Args)
4750 llvm::dbgs() << "with " << Args->size();
4751 else
4752 llvm::dbgs() << "without";
4753 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4754 });
4755 Tokens->setPosition(Position);
4756 FormatTok = ID;
4757 }
4758 }
4759
4760 if (FormatTok->isNot(tok::comment)) {
4761 distributeComments(Comments, FormatTok);
4762 Comments.clear();
4763 return;
4764 }
4765
4766 Comments.push_back(FormatTok);
4767 } while (!eof());
4768
4769 distributeComments(Comments, nullptr);
4770 Comments.clear();
4771}
4772
4773namespace {
4774template <typename Iterator>
4775void pushTokens(Iterator Begin, Iterator End,
4777 for (auto I = Begin; I != End; ++I) {
4778 Into.push_back(I->Tok);
4779 for (const auto &Child : I->Children)
4780 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4781 }
4782}
4783} // namespace
4784
4785std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4786UnwrappedLineParser::parseMacroCall() {
4787 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4788 assert(Line->Tokens.empty());
4789 nextToken();
4790 if (FormatTok->isNot(tok::l_paren))
4791 return Args;
4792 unsigned Position = Tokens->getPosition();
4793 FormatToken *Tok = FormatTok;
4794 nextToken();
4795 Args.emplace();
4796 auto ArgStart = std::prev(Line->Tokens.end());
4797
4798 int Parens = 0;
4799 do {
4800 switch (FormatTok->Tok.getKind()) {
4801 case tok::l_paren:
4802 ++Parens;
4803 nextToken();
4804 break;
4805 case tok::r_paren: {
4806 if (Parens > 0) {
4807 --Parens;
4808 nextToken();
4809 break;
4810 }
4811 Args->push_back({});
4812 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4813 nextToken();
4814 return Args;
4815 }
4816 case tok::comma: {
4817 if (Parens > 0) {
4818 nextToken();
4819 break;
4820 }
4821 Args->push_back({});
4822 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4823 nextToken();
4824 ArgStart = std::prev(Line->Tokens.end());
4825 break;
4826 }
4827 default:
4828 nextToken();
4829 break;
4830 }
4831 } while (!eof());
4832 Line->Tokens.resize(1);
4833 Tokens->setPosition(Position);
4834 FormatTok = Tok;
4835 return {};
4836}
4837
4838void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4839 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4840 if (MustBreakBeforeNextToken) {
4841 Line->Tokens.back().Tok->MustBreakBefore = true;
4842 MustBreakBeforeNextToken = false;
4843 }
4844}
4845
4846} // end namespace format
4847} // end namespace clang
DynTypedNode Node
static char ID
Definition: Arena.cpp:163
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:2937
This file contains the main building blocks of macro support in clang-format.
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
do v
Definition: arm_acle.h:76
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:53
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:186
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:109
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
void setKind(tok::TokenKind K)
Definition: Token.h:94
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:66
tok::TokenKind getKind() const
Definition: Token.h:93
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:100
bool isNot(tok::TokenKind K) const
Definition: Token.h:99
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:195
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool defined(llvm::StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
llvm::SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool objectLike(llvm::StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
bool hasArity(llvm::StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:66
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1814
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex)
static bool isCOperatorFollowingVar(tok::TokenKind kind)
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1807
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:176
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:952
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1732
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1725
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1770
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1744
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1698
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1349
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1662
bool AfterClass
Wrap class definitions.
Definition: Format.h:1190
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1257
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1271
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1205
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1348
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1243
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1237
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1193
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1221
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1285
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ LK_Java
Should be used for Java.
Definition: Format.h:2919
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:2930
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:2928
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2933
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2618
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2490
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2582
bool RemoveSemicolon
Remove semicolons after the closing brace of a non-empty function.
Definition: Format.h:3605
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2528
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2542
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2471
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:2664
RemoveParenthesesStyle RemoveParentheses
Remove redundant parentheses.
Definition: Format.h:3588
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:2948
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:3552
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2577
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2559
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:696
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:3089
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:1941
bool isCSharp() const
Definition: Format.h:2940
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1154
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:1133
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:1824
bool isProto() const
Definition: Format.h:2944
bool isVerilog() const
Definition: Format.h:2943
bool isJavaScript() const
Definition: Format.h:2942
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2507
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1402
@ RPS_Leave
Do not remove parentheses.
Definition: Format.h:3562
@ RPS_ReturnStatement
Also remove parentheses enclosing the expression in a return/co_return statement.
Definition: Format.h:3577
@ NI_All
Indent in all namespaces.
Definition: Format.h:3084
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:3074
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2448
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2547
unsigned ColumnLimit
The column limit.
Definition: Format.h:2100
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:261
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:536
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:619
bool isNot(T Kind) const
Definition: FormatToken.h:580
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:280
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:760
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:39
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:335
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:421
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:351
bool isStringLiteral() const
Definition: FormatToken.h:613
bool isBinaryOperator() const
Definition: FormatToken.h:697
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:561
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:748
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:573
TokenType getType() const
Returns the token's type, e.g.
Definition: FormatToken.h:389
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:338
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:518
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:521
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:404
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex