clang  14.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
56  UnwrappedLine &Line;
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
127  UnwrappedLine &Line;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
133  FormatToken *Token;
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = std::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : CompoundStatementIndenter(Parser, LineLevel,
177  Style.BraceWrapping.AfterControlStatement,
178  Style.BraceWrapping.IndentBraces) {}
180  bool WrapBrace, bool IndentBrace)
181  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182  if (WrapBrace)
183  Parser->addUnwrappedLine();
184  if (IndentBrace)
185  ++LineLevel;
186  }
187  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190  unsigned &LineLevel;
191  unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199  : Tokens(Tokens), Position(-1) {}
200 
201  FormatToken *getNextToken() override {
202  ++Position;
203  return Tokens[Position];
204  }
205 
206  unsigned getPosition() override {
207  assert(Position >= 0);
208  return Position;
209  }
210 
211  FormatToken *setPosition(unsigned P) override {
212  Position = P;
213  return Tokens[Position];
214  }
215 
216  void reset() { Position = -1; }
217 
218 private:
219  ArrayRef<FormatToken *> Tokens;
220  int Position;
221 };
222 
223 } // end anonymous namespace
224 
226  const AdditionalKeywords &Keywords,
227  unsigned FirstStartColumn,
229  UnwrappedLineConsumer &Callback)
230  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235  ? IG_Rejected
236  : IG_Inited),
237  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240  PPBranchLevel = -1;
241  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242  ? IG_Rejected
243  : IG_Inited;
244  IncludeGuardToken = nullptr;
245  Line.reset(new UnwrappedLine);
246  CommentsBeforeNextToken.clear();
247  FormatTok = nullptr;
248  MustBreakBeforeNextToken = false;
249  PreprocessorDirectives.clear();
250  CurrentLines = &Lines;
251  DeclarationScopeStack.clear();
252  PPStack.clear();
253  Line->FirstStartColumn = FirstStartColumn;
254 }
255 
257  IndexedTokenSource TokenSource(AllTokens);
258  Line->FirstStartColumn = FirstStartColumn;
259  do {
260  LLVM_DEBUG(llvm::dbgs() << "----\n");
261  reset();
262  Tokens = &TokenSource;
263  TokenSource.reset();
264 
265  readToken();
266  parseFile();
267 
268  // If we found an include guard then all preprocessor directives (other than
269  // the guard) are over-indented by one.
270  if (IncludeGuard == IG_Found)
271  for (auto &Line : Lines)
272  if (Line.InPPDirective && Line.Level > 0)
273  --Line.Level;
274 
275  // Create line with eof token.
276  pushToken(FormatTok);
277  addUnwrappedLine();
278 
279  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280  E = Lines.end();
281  I != E; ++I) {
282  Callback.consumeUnwrappedLine(*I);
283  }
284  Callback.finishRun();
285  Lines.clear();
286  while (!PPLevelBranchIndex.empty() &&
287  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290  }
291  if (!PPLevelBranchIndex.empty()) {
292  ++PPLevelBranchIndex.back();
293  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295  }
296  } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300  // The top-level context in a file always has declarations, except for pre-
301  // processor directives and JavaScript files.
302  bool MustBeDeclaration =
303  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305  MustBeDeclaration);
306  if (Style.Language == FormatStyle::LK_TextProto)
307  parseBracedList();
308  else
309  parseLevel(/*HasOpeningBrace=*/false);
310  // Make sure to format the remaining tokens.
311  //
312  // LK_TextProto is special since its top-level is parsed as the body of a
313  // braced list, which does not necessarily have natural line separators such
314  // as a semicolon. Comments after the last entry that have been determined to
315  // not belong to that line, as in:
316  // key: value
317  // // endfile comment
318  // do not have a chance to be put on a line of their own until this point.
319  // Here we add this newline before end-of-file comments.
320  if (Style.Language == FormatStyle::LK_TextProto &&
321  !CommentsBeforeNextToken.empty())
322  addUnwrappedLine();
323  flushComments(true);
324  addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328  do {
329  switch (FormatTok->Tok.getKind()) {
330  case tok::l_brace:
331  return;
332  default:
333  if (FormatTok->is(Keywords.kw_where)) {
334  addUnwrappedLine();
335  nextToken();
336  parseCSharpGenericTypeConstraint();
337  break;
338  }
339  nextToken();
340  break;
341  }
342  } while (!eof());
343 }
344 
345 void UnwrappedLineParser::parseCSharpAttribute() {
346  int UnpairedSquareBrackets = 1;
347  do {
348  switch (FormatTok->Tok.getKind()) {
349  case tok::r_square:
350  nextToken();
351  --UnpairedSquareBrackets;
352  if (UnpairedSquareBrackets == 0) {
353  addUnwrappedLine();
354  return;
355  }
356  break;
357  case tok::l_square:
358  ++UnpairedSquareBrackets;
359  nextToken();
360  break;
361  default:
362  nextToken();
363  break;
364  }
365  } while (!eof());
366 }
367 
368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369  bool SwitchLabelEncountered = false;
370  do {
371  tok::TokenKind kind = FormatTok->Tok.getKind();
372  if (FormatTok->getType() == TT_MacroBlockBegin) {
373  kind = tok::l_brace;
374  } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375  kind = tok::r_brace;
376  }
377 
378  switch (kind) {
379  case tok::comment:
380  nextToken();
381  addUnwrappedLine();
382  break;
383  case tok::l_brace:
384  // FIXME: Add parameter whether this can happen - if this happens, we must
385  // be in a non-declaration context.
386  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387  continue;
388  parseBlock(/*MustBeDeclaration=*/false);
389  addUnwrappedLine();
390  break;
391  case tok::r_brace:
392  if (HasOpeningBrace)
393  return;
394  nextToken();
395  addUnwrappedLine();
396  break;
397  case tok::kw_default: {
398  unsigned StoredPosition = Tokens->getPosition();
399  FormatToken *Next;
400  do {
401  Next = Tokens->getNextToken();
402  } while (Next && Next->is(tok::comment));
403  FormatTok = Tokens->setPosition(StoredPosition);
404  if (Next && Next->isNot(tok::colon)) {
405  // default not followed by ':' is not a case label; treat it like
406  // an identifier.
407  parseStructuralElement();
408  break;
409  }
410  // Else, if it is 'default:', fall through to the case handling.
411  LLVM_FALLTHROUGH;
412  }
413  case tok::kw_case:
414  if (Style.Language == FormatStyle::LK_JavaScript &&
415  Line->MustBeDeclaration) {
416  // A 'case: string' style field declaration.
417  parseStructuralElement();
418  break;
419  }
420  if (!SwitchLabelEncountered &&
421  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422  ++Line->Level;
423  SwitchLabelEncountered = true;
424  parseStructuralElement();
425  break;
426  case tok::l_square:
427  if (Style.isCSharp()) {
428  nextToken();
429  parseCSharpAttribute();
430  break;
431  }
432  LLVM_FALLTHROUGH;
433  default:
434  parseStructuralElement(!HasOpeningBrace);
435  break;
436  }
437  } while (!eof());
438 }
439 
440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441  // We'll parse forward through the tokens until we hit
442  // a closing brace or eof - note that getNextToken() will
443  // parse macros, so this will magically work inside macro
444  // definitions, too.
445  unsigned StoredPosition = Tokens->getPosition();
446  FormatToken *Tok = FormatTok;
447  const FormatToken *PrevTok = Tok->Previous;
448  // Keep a stack of positions of lbrace tokens. We will
449  // update information about whether an lbrace starts a
450  // braced init list or a different block during the loop.
451  SmallVector<FormatToken *, 8> LBraceStack;
452  assert(Tok->Tok.is(tok::l_brace));
453  do {
454  // Get next non-comment token.
455  FormatToken *NextTok;
456  unsigned ReadTokens = 0;
457  do {
458  NextTok = Tokens->getNextToken();
459  ++ReadTokens;
460  } while (NextTok->is(tok::comment));
461 
462  switch (Tok->Tok.getKind()) {
463  case tok::l_brace:
464  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465  if (PrevTok->isOneOf(tok::colon, tok::less))
466  // A ':' indicates this code is in a type, or a braced list
467  // following a label in an object literal ({a: {b: 1}}).
468  // A '<' could be an object used in a comparison, but that is nonsense
469  // code (can never return true), so more likely it is a generic type
470  // argument (`X<{a: string; b: number}>`).
471  // The code below could be confused by semicolons between the
472  // individual members in a type member list, which would normally
473  // trigger BK_Block. In both cases, this must be parsed as an inline
474  // braced init.
475  Tok->setBlockKind(BK_BracedInit);
476  else if (PrevTok->is(tok::r_paren))
477  // `) { }` can only occur in function or method declarations in JS.
478  Tok->setBlockKind(BK_Block);
479  } else {
480  Tok->setBlockKind(BK_Unknown);
481  }
482  LBraceStack.push_back(Tok);
483  break;
484  case tok::r_brace:
485  if (LBraceStack.empty())
486  break;
487  if (LBraceStack.back()->is(BK_Unknown)) {
488  bool ProbablyBracedList = false;
489  if (Style.Language == FormatStyle::LK_Proto) {
490  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491  } else {
492  // Skip NextTok over preprocessor lines, otherwise we may not
493  // properly diagnose the block as a braced intializer
494  // if the comma separator appears after the pp directive.
495  while (NextTok->is(tok::hash)) {
496  ScopedMacroState MacroState(*Line, Tokens, NextTok);
497  do {
498  NextTok = Tokens->getNextToken();
499  ++ReadTokens;
500  } while (NextTok->isNot(tok::eof));
501  }
502 
503  // Using OriginalColumn to distinguish between ObjC methods and
504  // binary operators is a bit hacky.
505  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
506  NextTok->OriginalColumn == 0;
507 
508  // If there is a comma, semicolon or right paren after the closing
509  // brace, we assume this is a braced initializer list. Note that
510  // regardless how we mark inner braces here, we will overwrite the
511  // BlockKind later if we parse a braced list (where all blocks
512  // inside are by default braced lists), or when we explicitly detect
513  // blocks (for example while parsing lambdas).
514  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
515  // braced list in JS.
516  ProbablyBracedList =
518  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
519  Keywords.kw_as)) ||
520  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
521  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
522  tok::r_paren, tok::r_square, tok::l_brace,
523  tok::ellipsis) ||
524  (NextTok->is(tok::identifier) &&
525  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
526  (NextTok->is(tok::semi) &&
527  (!ExpectClassBody || LBraceStack.size() != 1)) ||
528  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
529  if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
530  // We can have an array subscript after a braced init
531  // list, but C++11 attributes are expected after blocks.
532  NextTok = Tokens->getNextToken();
533  ++ReadTokens;
534  ProbablyBracedList = NextTok->isNot(tok::l_square);
535  }
536  }
537  if (ProbablyBracedList) {
538  Tok->setBlockKind(BK_BracedInit);
539  LBraceStack.back()->setBlockKind(BK_BracedInit);
540  } else {
541  Tok->setBlockKind(BK_Block);
542  LBraceStack.back()->setBlockKind(BK_Block);
543  }
544  }
545  LBraceStack.pop_back();
546  break;
547  case tok::identifier:
548  if (!Tok->is(TT_StatementMacro))
549  break;
550  LLVM_FALLTHROUGH;
551  case tok::at:
552  case tok::semi:
553  case tok::kw_if:
554  case tok::kw_while:
555  case tok::kw_for:
556  case tok::kw_switch:
557  case tok::kw_try:
558  case tok::kw___try:
559  if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
560  LBraceStack.back()->setBlockKind(BK_Block);
561  break;
562  default:
563  break;
564  }
565  PrevTok = Tok;
566  Tok = NextTok;
567  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
568 
569  // Assume other blocks for all unclosed opening braces.
570  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
571  if (LBraceStack[i]->is(BK_Unknown))
572  LBraceStack[i]->setBlockKind(BK_Block);
573  }
574 
575  FormatTok = Tokens->setPosition(StoredPosition);
576 }
577 
578 template <class T>
579 static inline void hash_combine(std::size_t &seed, const T &v) {
580  std::hash<T> hasher;
581  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
582 }
583 
584 size_t UnwrappedLineParser::computePPHash() const {
585  size_t h = 0;
586  for (const auto &i : PPStack) {
587  hash_combine(h, size_t(i.Kind));
588  hash_combine(h, i.Line);
589  }
590  return h;
591 }
592 
593 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
594  bool MunchSemi,
595  bool UnindentWhitesmithsBraces) {
596  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
597  "'{' or macro block token expected");
598  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
599  FormatTok->setBlockKind(BK_Block);
600 
601  // For Whitesmiths mode, jump to the next level prior to skipping over the
602  // braces.
603  if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
604  ++Line->Level;
605 
606  size_t PPStartHash = computePPHash();
607 
608  unsigned InitialLevel = Line->Level;
609  nextToken(/*LevelDifference=*/AddLevels);
610 
611  if (MacroBlock && FormatTok->is(tok::l_paren))
612  parseParens();
613 
614  size_t NbPreprocessorDirectives =
615  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
616  addUnwrappedLine();
617  size_t OpeningLineIndex =
618  CurrentLines->empty()
620  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
621 
622  // Whitesmiths is weird here. The brace needs to be indented for the namespace
623  // block, but the block itself may not be indented depending on the style
624  // settings. This allows the format to back up one level in those cases.
625  if (UnindentWhitesmithsBraces)
626  --Line->Level;
627 
628  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
629  MustBeDeclaration);
630  if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
631  Line->Level += AddLevels;
632  parseLevel(/*HasOpeningBrace=*/true);
633 
634  if (eof())
635  return;
636 
637  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
638  : !FormatTok->is(tok::r_brace)) {
639  Line->Level = InitialLevel;
640  FormatTok->setBlockKind(BK_Block);
641  return;
642  }
643 
644  size_t PPEndHash = computePPHash();
645 
646  // Munch the closing brace.
647  nextToken(/*LevelDifference=*/-AddLevels);
648 
649  if (MacroBlock && FormatTok->is(tok::l_paren))
650  parseParens();
651 
652  if (FormatTok->is(tok::arrow)) {
653  // Following the } we can find a trailing return type arrow
654  // as part of an implicit conversion constraint.
655  nextToken();
656  parseStructuralElement();
657  }
658 
659  if (MunchSemi && FormatTok->Tok.is(tok::semi))
660  nextToken();
661 
662  Line->Level = InitialLevel;
663 
664  if (PPStartHash == PPEndHash) {
665  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
666  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
667  // Update the opening line to add the forward reference as well
668  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
669  CurrentLines->size() - 1;
670  }
671  }
672 }
673 
674 static bool isGoogScope(const UnwrappedLine &Line) {
675  // FIXME: Closure-library specific stuff should not be hard-coded but be
676  // configurable.
677  if (Line.Tokens.size() < 4)
678  return false;
679  auto I = Line.Tokens.begin();
680  if (I->Tok->TokenText != "goog")
681  return false;
682  ++I;
683  if (I->Tok->isNot(tok::period))
684  return false;
685  ++I;
686  if (I->Tok->TokenText != "scope")
687  return false;
688  ++I;
689  return I->Tok->is(tok::l_paren);
690 }
691 
692 static bool isIIFE(const UnwrappedLine &Line,
693  const AdditionalKeywords &Keywords) {
694  // Look for the start of an immediately invoked anonymous function.
695  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
696  // This is commonly done in JavaScript to create a new, anonymous scope.
697  // Example: (function() { ... })()
698  if (Line.Tokens.size() < 3)
699  return false;
700  auto I = Line.Tokens.begin();
701  if (I->Tok->isNot(tok::l_paren))
702  return false;
703  ++I;
704  if (I->Tok->isNot(Keywords.kw_function))
705  return false;
706  ++I;
707  return I->Tok->is(tok::l_paren);
708 }
709 
710 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
711  const FormatToken &InitialToken) {
712  if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
713  return Style.BraceWrapping.AfterNamespace;
714  if (InitialToken.is(tok::kw_class))
715  return Style.BraceWrapping.AfterClass;
716  if (InitialToken.is(tok::kw_union))
717  return Style.BraceWrapping.AfterUnion;
718  if (InitialToken.is(tok::kw_struct))
719  return Style.BraceWrapping.AfterStruct;
720  return false;
721 }
722 
723 void UnwrappedLineParser::parseChildBlock() {
724  FormatTok->setBlockKind(BK_Block);
725  nextToken();
726  {
727  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
728  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
729  ScopedLineState LineState(*this);
730  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
731  /*MustBeDeclaration=*/false);
732  Line->Level += SkipIndent ? 0 : 1;
733  parseLevel(/*HasOpeningBrace=*/true);
734  flushComments(isOnNewLine(*FormatTok));
735  Line->Level -= SkipIndent ? 0 : 1;
736  }
737  nextToken();
738 }
739 
740 void UnwrappedLineParser::parsePPDirective() {
741  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
742  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
743 
744  nextToken();
745 
746  if (!FormatTok->Tok.getIdentifierInfo()) {
747  parsePPUnknown();
748  return;
749  }
750 
751  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
752  case tok::pp_define:
753  parsePPDefine();
754  return;
755  case tok::pp_if:
756  parsePPIf(/*IfDef=*/false);
757  break;
758  case tok::pp_ifdef:
759  case tok::pp_ifndef:
760  parsePPIf(/*IfDef=*/true);
761  break;
762  case tok::pp_else:
763  parsePPElse();
764  break;
765  case tok::pp_elifdef:
766  case tok::pp_elifndef:
767  case tok::pp_elif:
768  parsePPElIf();
769  break;
770  case tok::pp_endif:
771  parsePPEndIf();
772  break;
773  default:
774  parsePPUnknown();
775  break;
776  }
777 }
778 
779 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
780  size_t Line = CurrentLines->size();
781  if (CurrentLines == &PreprocessorDirectives)
782  Line += Lines.size();
783 
784  if (Unreachable ||
785  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
786  PPStack.push_back({PP_Unreachable, Line});
787  else
788  PPStack.push_back({PP_Conditional, Line});
789 }
790 
791 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
792  ++PPBranchLevel;
793  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
794  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
795  PPLevelBranchIndex.push_back(0);
796  PPLevelBranchCount.push_back(0);
797  }
798  PPChainBranchIndex.push(0);
799  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
800  conditionalCompilationCondition(Unreachable || Skip);
801 }
802 
803 void UnwrappedLineParser::conditionalCompilationAlternative() {
804  if (!PPStack.empty())
805  PPStack.pop_back();
806  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
807  if (!PPChainBranchIndex.empty())
808  ++PPChainBranchIndex.top();
809  conditionalCompilationCondition(
810  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
811  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
812 }
813 
814 void UnwrappedLineParser::conditionalCompilationEnd() {
815  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
816  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
817  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
818  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
819  }
820  }
821  // Guard against #endif's without #if.
822  if (PPBranchLevel > -1)
823  --PPBranchLevel;
824  if (!PPChainBranchIndex.empty())
825  PPChainBranchIndex.pop();
826  if (!PPStack.empty())
827  PPStack.pop_back();
828 }
829 
830 void UnwrappedLineParser::parsePPIf(bool IfDef) {
831  bool IfNDef = FormatTok->is(tok::pp_ifndef);
832  nextToken();
833  bool Unreachable = false;
834  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
835  Unreachable = true;
836  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
837  Unreachable = true;
838  conditionalCompilationStart(Unreachable);
839  FormatToken *IfCondition = FormatTok;
840  // If there's a #ifndef on the first line, and the only lines before it are
841  // comments, it could be an include guard.
842  bool MaybeIncludeGuard = IfNDef;
843  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
844  for (auto &Line : Lines) {
845  if (!Line.Tokens.front().Tok->is(tok::comment)) {
846  MaybeIncludeGuard = false;
847  IncludeGuard = IG_Rejected;
848  break;
849  }
850  }
851  --PPBranchLevel;
852  parsePPUnknown();
853  ++PPBranchLevel;
854  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
855  IncludeGuard = IG_IfNdefed;
856  IncludeGuardToken = IfCondition;
857  }
858 }
859 
860 void UnwrappedLineParser::parsePPElse() {
861  // If a potential include guard has an #else, it's not an include guard.
862  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
863  IncludeGuard = IG_Rejected;
864  conditionalCompilationAlternative();
865  if (PPBranchLevel > -1)
866  --PPBranchLevel;
867  parsePPUnknown();
868  ++PPBranchLevel;
869 }
870 
871 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
872 
873 void UnwrappedLineParser::parsePPEndIf() {
874  conditionalCompilationEnd();
875  parsePPUnknown();
876  // If the #endif of a potential include guard is the last thing in the file,
877  // then we found an include guard.
878  unsigned TokenPosition = Tokens->getPosition();
879  FormatToken *PeekNext = AllTokens[TokenPosition];
880  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
881  PeekNext->is(tok::eof) &&
883  IncludeGuard = IG_Found;
884 }
885 
886 void UnwrappedLineParser::parsePPDefine() {
887  nextToken();
888 
889  if (!FormatTok->Tok.getIdentifierInfo()) {
890  IncludeGuard = IG_Rejected;
891  IncludeGuardToken = nullptr;
892  parsePPUnknown();
893  return;
894  }
895 
896  if (IncludeGuard == IG_IfNdefed &&
897  IncludeGuardToken->TokenText == FormatTok->TokenText) {
898  IncludeGuard = IG_Defined;
899  IncludeGuardToken = nullptr;
900  for (auto &Line : Lines) {
901  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
902  IncludeGuard = IG_Rejected;
903  break;
904  }
905  }
906  }
907 
908  nextToken();
909  if (FormatTok->Tok.getKind() == tok::l_paren &&
910  FormatTok->WhitespaceRange.getBegin() ==
911  FormatTok->WhitespaceRange.getEnd()) {
912  parseParens();
913  }
915  Line->Level += PPBranchLevel + 1;
916  addUnwrappedLine();
917  ++Line->Level;
918 
919  // Errors during a preprocessor directive can only affect the layout of the
920  // preprocessor directive, and thus we ignore them. An alternative approach
921  // would be to use the same approach we use on the file level (no
922  // re-indentation if there was a structural error) within the macro
923  // definition.
924  parseFile();
925 }
926 
927 void UnwrappedLineParser::parsePPUnknown() {
928  do {
929  nextToken();
930  } while (!eof());
932  Line->Level += PPBranchLevel + 1;
933  addUnwrappedLine();
934 }
935 
936 // Here we exclude certain tokens that are not usually the first token in an
937 // unwrapped line. This is used in attempt to distinguish macro calls without
938 // trailing semicolons from other constructs split to several lines.
939 static bool tokenCanStartNewLine(const FormatToken &Tok) {
940  // Semicolon can be a null-statement, l_square can be a start of a macro or
941  // a C++11 attribute, but this doesn't seem to be common.
942  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
943  Tok.isNot(TT_AttributeSquare) &&
944  // Tokens that can only be used as binary operators and a part of
945  // overloaded operator names.
946  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
947  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
948  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
949  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
950  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
951  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
952  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
953  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
954  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
955  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
956  Tok.isNot(tok::lesslessequal) &&
957  // Colon is used in labels, base class lists, initializer lists,
958  // range-based for loops, ternary operator, but should never be the
959  // first token in an unwrapped line.
960  Tok.isNot(tok::colon) &&
961  // 'noexcept' is a trailing annotation.
962  Tok.isNot(tok::kw_noexcept);
963 }
964 
965 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
966  const FormatToken *FormatTok) {
967  // FIXME: This returns true for C/C++ keywords like 'struct'.
968  return FormatTok->is(tok::identifier) &&
969  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
970  !FormatTok->isOneOf(
971  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
972  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
973  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
974  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
975  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
976  Keywords.kw_instanceof, Keywords.kw_interface,
977  Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
978 }
979 
980 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
981  const FormatToken *FormatTok) {
982  return FormatTok->Tok.isLiteral() ||
983  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
984  mustBeJSIdent(Keywords, FormatTok);
985 }
986 
987 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
988 // when encountered after a value (see mustBeJSIdentOrValue).
989 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
990  const FormatToken *FormatTok) {
991  return FormatTok->isOneOf(
992  tok::kw_return, Keywords.kw_yield,
993  // conditionals
994  tok::kw_if, tok::kw_else,
995  // loops
996  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
997  // switch/case
998  tok::kw_switch, tok::kw_case,
999  // exceptions
1000  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1001  // declaration
1002  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1003  Keywords.kw_async, Keywords.kw_function,
1004  // import/export
1005  Keywords.kw_import, tok::kw_export);
1006 }
1007 
1008 // Checks whether a token is a type in K&R C (aka C78).
1009 static bool isC78Type(const FormatToken &Tok) {
1010  return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1011  tok::kw_unsigned, tok::kw_float, tok::kw_double,
1012  tok::identifier);
1013 }
1014 
1015 // This function checks whether a token starts the first parameter declaration
1016 // in a K&R C (aka C78) function definition, e.g.:
1017 // int f(a, b)
1018 // short a, b;
1019 // {
1020 // return a + b;
1021 // }
1022 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1023  const FormatToken *FuncName) {
1024  assert(Tok);
1025  assert(Next);
1026  assert(FuncName);
1027 
1028  if (FuncName->isNot(tok::identifier))
1029  return false;
1030 
1031  const FormatToken *Prev = FuncName->Previous;
1032  if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1033  return false;
1034 
1035  if (!isC78Type(*Tok) &&
1036  !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1037  return false;
1038 
1039  if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1040  return false;
1041 
1042  Tok = Tok->Previous;
1043  if (!Tok || Tok->isNot(tok::r_paren))
1044  return false;
1045 
1046  Tok = Tok->Previous;
1047  if (!Tok || Tok->isNot(tok::identifier))
1048  return false;
1049 
1050  return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1051 }
1052 
1053 // readTokenWithJavaScriptASI reads the next token and terminates the current
1054 // line if JavaScript Automatic Semicolon Insertion must
1055 // happen between the current token and the next token.
1056 //
1057 // This method is conservative - it cannot cover all edge cases of JavaScript,
1058 // but only aims to correctly handle certain well known cases. It *must not*
1059 // return true in speculative cases.
1060 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1061  FormatToken *Previous = FormatTok;
1062  readToken();
1063  FormatToken *Next = FormatTok;
1064 
1065  bool IsOnSameLine =
1066  CommentsBeforeNextToken.empty()
1067  ? Next->NewlinesBefore == 0
1068  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1069  if (IsOnSameLine)
1070  return;
1071 
1072  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1073  bool PreviousStartsTemplateExpr =
1074  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1075  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1076  // If the line contains an '@' sign, the previous token might be an
1077  // annotation, which can precede another identifier/value.
1078  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1079  [](UnwrappedLineNode &LineNode) {
1080  return LineNode.Tok->is(tok::at);
1081  }) != Line->Tokens.end();
1082  if (HasAt)
1083  return;
1084  }
1085  if (Next->is(tok::exclaim) && PreviousMustBeValue)
1086  return addUnwrappedLine();
1087  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1088  bool NextEndsTemplateExpr =
1089  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1090  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1091  (PreviousMustBeValue ||
1092  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1093  tok::minusminus)))
1094  return addUnwrappedLine();
1095  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1096  isJSDeclOrStmt(Keywords, Next))
1097  return addUnwrappedLine();
1098 }
1099 
1100 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1101  assert(!FormatTok->is(tok::l_brace));
1102  if (Style.Language == FormatStyle::LK_TableGen &&
1103  FormatTok->is(tok::pp_include)) {
1104  nextToken();
1105  if (FormatTok->is(tok::string_literal))
1106  nextToken();
1107  addUnwrappedLine();
1108  return;
1109  }
1110  switch (FormatTok->Tok.getKind()) {
1111  case tok::kw_asm:
1112  nextToken();
1113  if (FormatTok->is(tok::l_brace)) {
1114  FormatTok->setType(TT_InlineASMBrace);
1115  nextToken();
1116  while (FormatTok && FormatTok->isNot(tok::eof)) {
1117  if (FormatTok->is(tok::r_brace)) {
1118  FormatTok->setType(TT_InlineASMBrace);
1119  nextToken();
1120  addUnwrappedLine();
1121  break;
1122  }
1123  FormatTok->Finalized = true;
1124  nextToken();
1125  }
1126  }
1127  break;
1128  case tok::kw_namespace:
1129  parseNamespace();
1130  return;
1131  case tok::kw_public:
1132  case tok::kw_protected:
1133  case tok::kw_private:
1134  if (Style.Language == FormatStyle::LK_Java ||
1135  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1136  nextToken();
1137  else
1138  parseAccessSpecifier();
1139  return;
1140  case tok::kw_if:
1141  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1142  // field/method declaration.
1143  break;
1144  parseIfThenElse();
1145  return;
1146  case tok::kw_for:
1147  case tok::kw_while:
1148  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1149  // field/method declaration.
1150  break;
1151  parseForOrWhileLoop();
1152  return;
1153  case tok::kw_do:
1154  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1155  // field/method declaration.
1156  break;
1157  parseDoWhile();
1158  return;
1159  case tok::kw_switch:
1160  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1161  // 'switch: string' field declaration.
1162  break;
1163  parseSwitch();
1164  return;
1165  case tok::kw_default:
1166  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1167  // 'default: string' field declaration.
1168  break;
1169  nextToken();
1170  if (FormatTok->is(tok::colon)) {
1171  parseLabel();
1172  return;
1173  }
1174  // e.g. "default void f() {}" in a Java interface.
1175  break;
1176  case tok::kw_case:
1177  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1178  // 'case: string' field declaration.
1179  break;
1180  parseCaseLabel();
1181  return;
1182  case tok::kw_try:
1183  case tok::kw___try:
1184  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1185  // field/method declaration.
1186  break;
1187  parseTryCatch();
1188  return;
1189  case tok::kw_extern:
1190  nextToken();
1191  if (FormatTok->Tok.is(tok::string_literal)) {
1192  nextToken();
1193  if (FormatTok->Tok.is(tok::l_brace)) {
1194  if (!Style.IndentExternBlock) {
1195  if (Style.BraceWrapping.AfterExternBlock) {
1196  addUnwrappedLine();
1197  }
1198  unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1199  parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1200  } else {
1201  unsigned AddLevels =
1202  Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1203  parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1204  }
1205  addUnwrappedLine();
1206  return;
1207  }
1208  }
1209  break;
1210  case tok::kw_export:
1211  if (Style.Language == FormatStyle::LK_JavaScript) {
1212  parseJavaScriptEs6ImportExport();
1213  return;
1214  }
1215  if (!Style.isCpp())
1216  break;
1217  // Handle C++ "(inline|export) namespace".
1218  LLVM_FALLTHROUGH;
1219  case tok::kw_inline:
1220  nextToken();
1221  if (FormatTok->Tok.is(tok::kw_namespace)) {
1222  parseNamespace();
1223  return;
1224  }
1225  break;
1226  case tok::identifier:
1227  if (FormatTok->is(TT_ForEachMacro)) {
1228  parseForOrWhileLoop();
1229  return;
1230  }
1231  if (FormatTok->is(TT_MacroBlockBegin)) {
1232  parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1233  /*MunchSemi=*/false);
1234  return;
1235  }
1236  if (FormatTok->is(Keywords.kw_import)) {
1237  if (Style.Language == FormatStyle::LK_JavaScript) {
1238  parseJavaScriptEs6ImportExport();
1239  return;
1240  }
1241  if (Style.Language == FormatStyle::LK_Proto) {
1242  nextToken();
1243  if (FormatTok->is(tok::kw_public))
1244  nextToken();
1245  if (!FormatTok->is(tok::string_literal))
1246  return;
1247  nextToken();
1248  if (FormatTok->is(tok::semi))
1249  nextToken();
1250  addUnwrappedLine();
1251  return;
1252  }
1253  }
1254  if (Style.isCpp() &&
1255  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1256  Keywords.kw_slots, Keywords.kw_qslots)) {
1257  nextToken();
1258  if (FormatTok->is(tok::colon)) {
1259  nextToken();
1260  addUnwrappedLine();
1261  return;
1262  }
1263  }
1264  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1265  parseStatementMacro();
1266  return;
1267  }
1268  if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1269  parseNamespace();
1270  return;
1271  }
1272  // In all other cases, parse the declaration.
1273  break;
1274  default:
1275  break;
1276  }
1277  do {
1278  const FormatToken *Previous = FormatTok->Previous;
1279  switch (FormatTok->Tok.getKind()) {
1280  case tok::at:
1281  nextToken();
1282  if (FormatTok->Tok.is(tok::l_brace)) {
1283  nextToken();
1284  parseBracedList();
1285  break;
1286  } else if (Style.Language == FormatStyle::LK_Java &&
1287  FormatTok->is(Keywords.kw_interface)) {
1288  nextToken();
1289  break;
1290  }
1291  switch (FormatTok->Tok.getObjCKeywordID()) {
1292  case tok::objc_public:
1293  case tok::objc_protected:
1294  case tok::objc_package:
1295  case tok::objc_private:
1296  return parseAccessSpecifier();
1297  case tok::objc_interface:
1298  case tok::objc_implementation:
1299  return parseObjCInterfaceOrImplementation();
1300  case tok::objc_protocol:
1301  if (parseObjCProtocol())
1302  return;
1303  break;
1304  case tok::objc_end:
1305  return; // Handled by the caller.
1306  case tok::objc_optional:
1307  case tok::objc_required:
1308  nextToken();
1309  addUnwrappedLine();
1310  return;
1311  case tok::objc_autoreleasepool:
1312  nextToken();
1313  if (FormatTok->Tok.is(tok::l_brace)) {
1316  addUnwrappedLine();
1317  parseBlock(/*MustBeDeclaration=*/false);
1318  }
1319  addUnwrappedLine();
1320  return;
1321  case tok::objc_synchronized:
1322  nextToken();
1323  if (FormatTok->Tok.is(tok::l_paren))
1324  // Skip synchronization object
1325  parseParens();
1326  if (FormatTok->Tok.is(tok::l_brace)) {
1329  addUnwrappedLine();
1330  parseBlock(/*MustBeDeclaration=*/false);
1331  }
1332  addUnwrappedLine();
1333  return;
1334  case tok::objc_try:
1335  // This branch isn't strictly necessary (the kw_try case below would
1336  // do this too after the tok::at is parsed above). But be explicit.
1337  parseTryCatch();
1338  return;
1339  default:
1340  break;
1341  }
1342  break;
1343  case tok::kw_concept:
1344  parseConcept();
1345  break;
1346  case tok::kw_requires:
1347  parseRequires();
1348  break;
1349  case tok::kw_enum:
1350  // Ignore if this is part of "template <enum ...".
1351  if (Previous && Previous->is(tok::less)) {
1352  nextToken();
1353  break;
1354  }
1355 
1356  // parseEnum falls through and does not yet add an unwrapped line as an
1357  // enum definition can start a structural element.
1358  if (!parseEnum())
1359  break;
1360  // This only applies for C++.
1361  if (!Style.isCpp()) {
1362  addUnwrappedLine();
1363  return;
1364  }
1365  break;
1366  case tok::kw_typedef:
1367  nextToken();
1368  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1369  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1370  Keywords.kw_CF_CLOSED_ENUM,
1371  Keywords.kw_NS_CLOSED_ENUM))
1372  parseEnum();
1373  break;
1374  case tok::kw_struct:
1375  case tok::kw_union:
1376  case tok::kw_class:
1377  if (parseStructLike()) {
1378  return;
1379  }
1380  break;
1381  case tok::period:
1382  nextToken();
1383  // In Java, classes have an implicit static member "class".
1384  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1385  FormatTok->is(tok::kw_class))
1386  nextToken();
1387  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1388  FormatTok->Tok.getIdentifierInfo())
1389  // JavaScript only has pseudo keywords, all keywords are allowed to
1390  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1391  nextToken();
1392  break;
1393  case tok::semi:
1394  nextToken();
1395  addUnwrappedLine();
1396  return;
1397  case tok::r_brace:
1398  addUnwrappedLine();
1399  return;
1400  case tok::l_paren: {
1401  parseParens();
1402  // Break the unwrapped line if a K&R C function definition has a parameter
1403  // declaration.
1404  if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1405  break;
1406  const unsigned Position = Tokens->getPosition() + 1;
1407  assert(Position < AllTokens.size());
1408  if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
1409  addUnwrappedLine();
1410  return;
1411  }
1412  break;
1413  }
1414  case tok::kw_operator:
1415  nextToken();
1416  if (FormatTok->isBinaryOperator())
1417  nextToken();
1418  break;
1419  case tok::caret:
1420  nextToken();
1421  if (FormatTok->Tok.isAnyIdentifier() ||
1422  FormatTok->isSimpleTypeSpecifier())
1423  nextToken();
1424  if (FormatTok->is(tok::l_paren))
1425  parseParens();
1426  if (FormatTok->is(tok::l_brace))
1427  parseChildBlock();
1428  break;
1429  case tok::l_brace:
1430  if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1431  // A block outside of parentheses must be the last part of a
1432  // structural element.
1433  // FIXME: Figure out cases where this is not true, and add projections
1434  // for them (the one we know is missing are lambdas).
1435  if (Style.BraceWrapping.AfterFunction)
1436  addUnwrappedLine();
1437  FormatTok->setType(TT_FunctionLBrace);
1438  parseBlock(/*MustBeDeclaration=*/false);
1439  addUnwrappedLine();
1440  return;
1441  }
1442  // Otherwise this was a braced init list, and the structural
1443  // element continues.
1444  break;
1445  case tok::kw_try:
1446  if (Style.Language == FormatStyle::LK_JavaScript &&
1447  Line->MustBeDeclaration) {
1448  // field/method declaration.
1449  nextToken();
1450  break;
1451  }
1452  // We arrive here when parsing function-try blocks.
1453  if (Style.BraceWrapping.AfterFunction)
1454  addUnwrappedLine();
1455  parseTryCatch();
1456  return;
1457  case tok::identifier: {
1458  if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1459  Line->MustBeDeclaration) {
1460  addUnwrappedLine();
1461  parseCSharpGenericTypeConstraint();
1462  break;
1463  }
1464  if (FormatTok->is(TT_MacroBlockEnd)) {
1465  addUnwrappedLine();
1466  return;
1467  }
1468 
1469  // Function declarations (as opposed to function expressions) are parsed
1470  // on their own unwrapped line by continuing this loop. Function
1471  // expressions (functions that are not on their own line) must not create
1472  // a new unwrapped line, so they are special cased below.
1473  size_t TokenCount = Line->Tokens.size();
1474  if (Style.Language == FormatStyle::LK_JavaScript &&
1475  FormatTok->is(Keywords.kw_function) &&
1476  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1477  Keywords.kw_async)))) {
1478  tryToParseJSFunction();
1479  break;
1480  }
1481  if ((Style.Language == FormatStyle::LK_JavaScript ||
1482  Style.Language == FormatStyle::LK_Java) &&
1483  FormatTok->is(Keywords.kw_interface)) {
1484  if (Style.Language == FormatStyle::LK_JavaScript) {
1485  // In JavaScript/TypeScript, "interface" can be used as a standalone
1486  // identifier, e.g. in `var interface = 1;`. If "interface" is
1487  // followed by another identifier, it is very like to be an actual
1488  // interface declaration.
1489  unsigned StoredPosition = Tokens->getPosition();
1490  FormatToken *Next = Tokens->getNextToken();
1491  FormatTok = Tokens->setPosition(StoredPosition);
1492  if (Next && !mustBeJSIdent(Keywords, Next)) {
1493  nextToken();
1494  break;
1495  }
1496  }
1497  parseRecord();
1498  addUnwrappedLine();
1499  return;
1500  }
1501 
1502  if (FormatTok->is(Keywords.kw_interface)) {
1503  if (parseStructLike()) {
1504  return;
1505  }
1506  break;
1507  }
1508 
1509  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1510  parseStatementMacro();
1511  return;
1512  }
1513 
1514  // See if the following token should start a new unwrapped line.
1515  StringRef Text = FormatTok->TokenText;
1516  nextToken();
1517 
1518  // JS doesn't have macros, and within classes colons indicate fields, not
1519  // labels.
1520  if (Style.Language == FormatStyle::LK_JavaScript)
1521  break;
1522 
1523  TokenCount = Line->Tokens.size();
1524  if (TokenCount == 1 ||
1525  (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1526  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1527  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1528  parseLabel(!Style.IndentGotoLabels);
1529  return;
1530  }
1531  // Recognize function-like macro usages without trailing semicolon as
1532  // well as free-standing macros like Q_OBJECT.
1533  bool FunctionLike = FormatTok->is(tok::l_paren);
1534  if (FunctionLike)
1535  parseParens();
1536 
1537  bool FollowedByNewline =
1538  CommentsBeforeNextToken.empty()
1539  ? FormatTok->NewlinesBefore > 0
1540  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1541 
1542  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1543  tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1544  addUnwrappedLine();
1545  return;
1546  }
1547  }
1548  break;
1549  }
1550  case tok::equal:
1551  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1552  // TT_FatArrow. They always start an expression or a child block if
1553  // followed by a curly brace.
1554  if (FormatTok->is(TT_FatArrow)) {
1555  nextToken();
1556  if (FormatTok->is(tok::l_brace)) {
1557  // C# may break after => if the next character is a newline.
1558  if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1559  // calling `addUnwrappedLine()` here causes odd parsing errors.
1560  FormatTok->MustBreakBefore = true;
1561  }
1562  parseChildBlock();
1563  }
1564  break;
1565  }
1566 
1567  nextToken();
1568  if (FormatTok->Tok.is(tok::l_brace)) {
1569  // Block kind should probably be set to BK_BracedInit for any language.
1570  // C# needs this change to ensure that array initialisers and object
1571  // initialisers are indented the same way.
1572  if (Style.isCSharp())
1573  FormatTok->setBlockKind(BK_BracedInit);
1574  nextToken();
1575  parseBracedList();
1576  } else if (Style.Language == FormatStyle::LK_Proto &&
1577  FormatTok->Tok.is(tok::less)) {
1578  nextToken();
1579  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1580  /*ClosingBraceKind=*/tok::greater);
1581  }
1582  break;
1583  case tok::l_square:
1584  parseSquare();
1585  break;
1586  case tok::kw_new:
1587  parseNew();
1588  break;
1589  default:
1590  nextToken();
1591  break;
1592  }
1593  } while (!eof());
1594 }
1595 
1596 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1597  assert(FormatTok->is(tok::l_brace));
1598  if (!Style.isCSharp())
1599  return false;
1600  // See if it's a property accessor.
1601  if (FormatTok->Previous->isNot(tok::identifier))
1602  return false;
1603 
1604  // See if we are inside a property accessor.
1605  //
1606  // Record the current tokenPosition so that we can advance and
1607  // reset the current token. `Next` is not set yet so we need
1608  // another way to advance along the token stream.
1609  unsigned int StoredPosition = Tokens->getPosition();
1610  FormatToken *Tok = Tokens->getNextToken();
1611 
1612  // A trivial property accessor is of the form:
1613  // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1614  // Track these as they do not require line breaks to be introduced.
1615  bool HasGetOrSet = false;
1616  bool IsTrivialPropertyAccessor = true;
1617  while (!eof()) {
1618  if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1619  tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1620  Keywords.kw_set)) {
1621  if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1622  HasGetOrSet = true;
1623  Tok = Tokens->getNextToken();
1624  continue;
1625  }
1626  if (Tok->isNot(tok::r_brace))
1627  IsTrivialPropertyAccessor = false;
1628  break;
1629  }
1630 
1631  if (!HasGetOrSet) {
1632  Tokens->setPosition(StoredPosition);
1633  return false;
1634  }
1635 
1636  // Try to parse the property accessor:
1637  // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1638  Tokens->setPosition(StoredPosition);
1639  if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1640  addUnwrappedLine();
1641  nextToken();
1642  do {
1643  switch (FormatTok->Tok.getKind()) {
1644  case tok::r_brace:
1645  nextToken();
1646  if (FormatTok->is(tok::equal)) {
1647  while (!eof() && FormatTok->isNot(tok::semi))
1648  nextToken();
1649  nextToken();
1650  }
1651  addUnwrappedLine();
1652  return true;
1653  case tok::l_brace:
1654  ++Line->Level;
1655  parseBlock(/*MustBeDeclaration=*/true);
1656  addUnwrappedLine();
1657  --Line->Level;
1658  break;
1659  case tok::equal:
1660  if (FormatTok->is(TT_FatArrow)) {
1661  ++Line->Level;
1662  do {
1663  nextToken();
1664  } while (!eof() && FormatTok->isNot(tok::semi));
1665  nextToken();
1666  addUnwrappedLine();
1667  --Line->Level;
1668  break;
1669  }
1670  nextToken();
1671  break;
1672  default:
1673  if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1674  !IsTrivialPropertyAccessor) {
1675  // Non-trivial get/set needs to be on its own line.
1676  addUnwrappedLine();
1677  }
1678  nextToken();
1679  }
1680  } while (!eof());
1681 
1682  // Unreachable for well-formed code (paired '{' and '}').
1683  return true;
1684 }
1685 
1686 bool UnwrappedLineParser::tryToParseLambda() {
1687  if (!Style.isCpp()) {
1688  nextToken();
1689  return false;
1690  }
1691  assert(FormatTok->is(tok::l_square));
1692  FormatToken &LSquare = *FormatTok;
1693  if (!tryToParseLambdaIntroducer())
1694  return false;
1695 
1696  bool SeenArrow = false;
1697 
1698  while (FormatTok->isNot(tok::l_brace)) {
1699  if (FormatTok->isSimpleTypeSpecifier()) {
1700  nextToken();
1701  continue;
1702  }
1703  switch (FormatTok->Tok.getKind()) {
1704  case tok::l_brace:
1705  break;
1706  case tok::l_paren:
1707  parseParens();
1708  break;
1709  case tok::amp:
1710  case tok::star:
1711  case tok::kw_const:
1712  case tok::comma:
1713  case tok::less:
1714  case tok::greater:
1715  case tok::identifier:
1716  case tok::numeric_constant:
1717  case tok::coloncolon:
1718  case tok::kw_class:
1719  case tok::kw_mutable:
1720  case tok::kw_noexcept:
1721  case tok::kw_template:
1722  case tok::kw_typename:
1723  nextToken();
1724  break;
1725  // Specialization of a template with an integer parameter can contain
1726  // arithmetic, logical, comparison and ternary operators.
1727  //
1728  // FIXME: This also accepts sequences of operators that are not in the scope
1729  // of a template argument list.
1730  //
1731  // In a C++ lambda a template type can only occur after an arrow. We use
1732  // this as an heuristic to distinguish between Objective-C expressions
1733  // followed by an `a->b` expression, such as:
1734  // ([obj func:arg] + a->b)
1735  // Otherwise the code below would parse as a lambda.
1736  //
1737  // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1738  // explicit template lists: []<bool b = true && false>(U &&u){}
1739  case tok::plus:
1740  case tok::minus:
1741  case tok::exclaim:
1742  case tok::tilde:
1743  case tok::slash:
1744  case tok::percent:
1745  case tok::lessless:
1746  case tok::pipe:
1747  case tok::pipepipe:
1748  case tok::ampamp:
1749  case tok::caret:
1750  case tok::equalequal:
1751  case tok::exclaimequal:
1752  case tok::greaterequal:
1753  case tok::lessequal:
1754  case tok::question:
1755  case tok::colon:
1756  case tok::ellipsis:
1757  case tok::kw_true:
1758  case tok::kw_false:
1759  if (SeenArrow) {
1760  nextToken();
1761  break;
1762  }
1763  return true;
1764  case tok::arrow:
1765  // This might or might not actually be a lambda arrow (this could be an
1766  // ObjC method invocation followed by a dereferencing arrow). We might
1767  // reset this back to TT_Unknown in TokenAnnotator.
1768  FormatTok->setType(TT_LambdaArrow);
1769  SeenArrow = true;
1770  nextToken();
1771  break;
1772  default:
1773  return true;
1774  }
1775  }
1776  FormatTok->setType(TT_LambdaLBrace);
1777  LSquare.setType(TT_LambdaLSquare);
1778  parseChildBlock();
1779  return true;
1780 }
1781 
1782 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1783  const FormatToken *Previous = FormatTok->Previous;
1784  if (Previous &&
1785  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1786  tok::kw_delete, tok::l_square) ||
1787  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1788  Previous->isSimpleTypeSpecifier())) {
1789  nextToken();
1790  return false;
1791  }
1792  nextToken();
1793  if (FormatTok->is(tok::l_square)) {
1794  return false;
1795  }
1796  parseSquare(/*LambdaIntroducer=*/true);
1797  return true;
1798 }
1799 
1800 void UnwrappedLineParser::tryToParseJSFunction() {
1801  assert(FormatTok->is(Keywords.kw_function) ||
1802  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1803  if (FormatTok->is(Keywords.kw_async))
1804  nextToken();
1805  // Consume "function".
1806  nextToken();
1807 
1808  // Consume * (generator function). Treat it like C++'s overloaded operators.
1809  if (FormatTok->is(tok::star)) {
1810  FormatTok->setType(TT_OverloadedOperator);
1811  nextToken();
1812  }
1813 
1814  // Consume function name.
1815  if (FormatTok->is(tok::identifier))
1816  nextToken();
1817 
1818  if (FormatTok->isNot(tok::l_paren))
1819  return;
1820 
1821  // Parse formal parameter list.
1822  parseParens();
1823 
1824  if (FormatTok->is(tok::colon)) {
1825  // Parse a type definition.
1826  nextToken();
1827 
1828  // Eat the type declaration. For braced inline object types, balance braces,
1829  // otherwise just parse until finding an l_brace for the function body.
1830  if (FormatTok->is(tok::l_brace))
1831  tryToParseBracedList();
1832  else
1833  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1834  nextToken();
1835  }
1836 
1837  if (FormatTok->is(tok::semi))
1838  return;
1839 
1840  parseChildBlock();
1841 }
1842 
1843 bool UnwrappedLineParser::tryToParseBracedList() {
1844  if (FormatTok->is(BK_Unknown))
1845  calculateBraceTypes();
1846  assert(FormatTok->isNot(BK_Unknown));
1847  if (FormatTok->is(BK_Block))
1848  return false;
1849  nextToken();
1850  parseBracedList();
1851  return true;
1852 }
1853 
1854 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1855  bool IsEnum,
1856  tok::TokenKind ClosingBraceKind) {
1857  bool HasError = false;
1858 
1859  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1860  // replace this by using parseAssignmentExpression() inside.
1861  do {
1862  if (Style.isCSharp()) {
1863  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1864  // TT_FatArrow. They always start an expression or a child block if
1865  // followed by a curly brace.
1866  if (FormatTok->is(TT_FatArrow)) {
1867  nextToken();
1868  if (FormatTok->is(tok::l_brace)) {
1869  // C# may break after => if the next character is a newline.
1870  if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1871  // calling `addUnwrappedLine()` here causes odd parsing errors.
1872  FormatTok->MustBreakBefore = true;
1873  }
1874  parseChildBlock();
1875  continue;
1876  }
1877  }
1878  }
1879  if (Style.Language == FormatStyle::LK_JavaScript) {
1880  if (FormatTok->is(Keywords.kw_function) ||
1881  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1882  tryToParseJSFunction();
1883  continue;
1884  }
1885  if (FormatTok->is(TT_FatArrow)) {
1886  nextToken();
1887  // Fat arrows can be followed by simple expressions or by child blocks
1888  // in curly braces.
1889  if (FormatTok->is(tok::l_brace)) {
1890  parseChildBlock();
1891  continue;
1892  }
1893  }
1894  if (FormatTok->is(tok::l_brace)) {
1895  // Could be a method inside of a braced list `{a() { return 1; }}`.
1896  if (tryToParseBracedList())
1897  continue;
1898  parseChildBlock();
1899  }
1900  }
1901  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1902  if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1903  addUnwrappedLine();
1904  nextToken();
1905  return !HasError;
1906  }
1907  switch (FormatTok->Tok.getKind()) {
1908  case tok::caret:
1909  nextToken();
1910  if (FormatTok->is(tok::l_brace)) {
1911  parseChildBlock();
1912  }
1913  break;
1914  case tok::l_square:
1915  if (Style.isCSharp())
1916  parseSquare();
1917  else
1918  tryToParseLambda();
1919  break;
1920  case tok::l_paren:
1921  parseParens();
1922  // JavaScript can just have free standing methods and getters/setters in
1923  // object literals. Detect them by a "{" following ")".
1924  if (Style.Language == FormatStyle::LK_JavaScript) {
1925  if (FormatTok->is(tok::l_brace))
1926  parseChildBlock();
1927  break;
1928  }
1929  break;
1930  case tok::l_brace:
1931  // Assume there are no blocks inside a braced init list apart
1932  // from the ones we explicitly parse out (like lambdas).
1933  FormatTok->setBlockKind(BK_BracedInit);
1934  nextToken();
1935  parseBracedList();
1936  break;
1937  case tok::less:
1938  if (Style.Language == FormatStyle::LK_Proto) {
1939  nextToken();
1940  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1941  /*ClosingBraceKind=*/tok::greater);
1942  } else {
1943  nextToken();
1944  }
1945  break;
1946  case tok::semi:
1947  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1948  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1949  // used for error recovery if we have otherwise determined that this is
1950  // a braced list.
1951  if (Style.Language == FormatStyle::LK_JavaScript) {
1952  nextToken();
1953  break;
1954  }
1955  HasError = true;
1956  if (!ContinueOnSemicolons)
1957  return !HasError;
1958  nextToken();
1959  break;
1960  case tok::comma:
1961  nextToken();
1962  if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1963  addUnwrappedLine();
1964  break;
1965  default:
1966  nextToken();
1967  break;
1968  }
1969  } while (!eof());
1970  return false;
1971 }
1972 
1973 void UnwrappedLineParser::parseParens() {
1974  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1975  nextToken();
1976  do {
1977  switch (FormatTok->Tok.getKind()) {
1978  case tok::l_paren:
1979  parseParens();
1980  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1981  parseChildBlock();
1982  break;
1983  case tok::r_paren:
1984  nextToken();
1985  return;
1986  case tok::r_brace:
1987  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1988  return;
1989  case tok::l_square:
1990  tryToParseLambda();
1991  break;
1992  case tok::l_brace:
1993  if (!tryToParseBracedList())
1994  parseChildBlock();
1995  break;
1996  case tok::at:
1997  nextToken();
1998  if (FormatTok->Tok.is(tok::l_brace)) {
1999  nextToken();
2000  parseBracedList();
2001  }
2002  break;
2003  case tok::equal:
2004  if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2005  parseStructuralElement();
2006  else
2007  nextToken();
2008  break;
2009  case tok::kw_class:
2010  if (Style.Language == FormatStyle::LK_JavaScript)
2011  parseRecord(/*ParseAsExpr=*/true);
2012  else
2013  nextToken();
2014  break;
2015  case tok::identifier:
2016  if (Style.Language == FormatStyle::LK_JavaScript &&
2017  (FormatTok->is(Keywords.kw_function) ||
2018  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2019  tryToParseJSFunction();
2020  else
2021  nextToken();
2022  break;
2023  default:
2024  nextToken();
2025  break;
2026  }
2027  } while (!eof());
2028 }
2029 
2030 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2031  if (!LambdaIntroducer) {
2032  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2033  if (tryToParseLambda())
2034  return;
2035  }
2036  do {
2037  switch (FormatTok->Tok.getKind()) {
2038  case tok::l_paren:
2039  parseParens();
2040  break;
2041  case tok::r_square:
2042  nextToken();
2043  return;
2044  case tok::r_brace:
2045  // A "}" inside parenthesis is an error if there wasn't a matching "{".
2046  return;
2047  case tok::l_square:
2048  parseSquare();
2049  break;
2050  case tok::l_brace: {
2051  if (!tryToParseBracedList())
2052  parseChildBlock();
2053  break;
2054  }
2055  case tok::at:
2056  nextToken();
2057  if (FormatTok->Tok.is(tok::l_brace)) {
2058  nextToken();
2059  parseBracedList();
2060  }
2061  break;
2062  default:
2063  nextToken();
2064  break;
2065  }
2066  } while (!eof());
2067 }
2068 
2069 void UnwrappedLineParser::parseIfThenElse() {
2070  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2071  nextToken();
2072  if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2073  nextToken();
2074  if (FormatTok->Tok.is(tok::l_paren))
2075  parseParens();
2076  // handle [[likely]] / [[unlikely]]
2077  if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2078  parseSquare();
2079  bool NeedsUnwrappedLine = false;
2080  if (FormatTok->Tok.is(tok::l_brace)) {
2081  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2082  parseBlock(/*MustBeDeclaration=*/false);
2083  if (Style.BraceWrapping.BeforeElse)
2084  addUnwrappedLine();
2085  else
2086  NeedsUnwrappedLine = true;
2087  } else {
2088  addUnwrappedLine();
2089  ++Line->Level;
2090  parseStructuralElement();
2091  --Line->Level;
2092  }
2093  if (FormatTok->Tok.is(tok::kw_else)) {
2094  nextToken();
2095  // handle [[likely]] / [[unlikely]]
2096  if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2097  parseSquare();
2098  if (FormatTok->Tok.is(tok::l_brace)) {
2099  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2100  parseBlock(/*MustBeDeclaration=*/false);
2101  addUnwrappedLine();
2102  } else if (FormatTok->Tok.is(tok::kw_if)) {
2103  FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
2104  bool PrecededByComment = Previous->is(tok::comment);
2105  if (PrecededByComment) {
2106  addUnwrappedLine();
2107  ++Line->Level;
2108  }
2109  parseIfThenElse();
2110  if (PrecededByComment)
2111  --Line->Level;
2112  } else {
2113  addUnwrappedLine();
2114  ++Line->Level;
2115  parseStructuralElement();
2116  if (FormatTok->is(tok::eof))
2117  addUnwrappedLine();
2118  --Line->Level;
2119  }
2120  } else if (NeedsUnwrappedLine) {
2121  addUnwrappedLine();
2122  }
2123 }
2124 
2125 void UnwrappedLineParser::parseTryCatch() {
2126  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2127  nextToken();
2128  bool NeedsUnwrappedLine = false;
2129  if (FormatTok->is(tok::colon)) {
2130  // We are in a function try block, what comes is an initializer list.
2131  nextToken();
2132 
2133  // In case identifiers were removed by clang-tidy, what might follow is
2134  // multiple commas in sequence - before the first identifier.
2135  while (FormatTok->is(tok::comma))
2136  nextToken();
2137 
2138  while (FormatTok->is(tok::identifier)) {
2139  nextToken();
2140  if (FormatTok->is(tok::l_paren))
2141  parseParens();
2142  if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2143  FormatTok->is(tok::l_brace)) {
2144  do {
2145  nextToken();
2146  } while (!FormatTok->is(tok::r_brace));
2147  nextToken();
2148  }
2149 
2150  // In case identifiers were removed by clang-tidy, what might follow is
2151  // multiple commas in sequence - after the first identifier.
2152  while (FormatTok->is(tok::comma))
2153  nextToken();
2154  }
2155  }
2156  // Parse try with resource.
2157  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2158  parseParens();
2159  }
2160  if (FormatTok->is(tok::l_brace)) {
2161  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2162  parseBlock(/*MustBeDeclaration=*/false);
2163  if (Style.BraceWrapping.BeforeCatch) {
2164  addUnwrappedLine();
2165  } else {
2166  NeedsUnwrappedLine = true;
2167  }
2168  } else if (!FormatTok->is(tok::kw_catch)) {
2169  // The C++ standard requires a compound-statement after a try.
2170  // If there's none, we try to assume there's a structuralElement
2171  // and try to continue.
2172  addUnwrappedLine();
2173  ++Line->Level;
2174  parseStructuralElement();
2175  --Line->Level;
2176  }
2177  while (1) {
2178  if (FormatTok->is(tok::at))
2179  nextToken();
2180  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2181  tok::kw___finally) ||
2182  ((Style.Language == FormatStyle::LK_Java ||
2184  FormatTok->is(Keywords.kw_finally)) ||
2185  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2186  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2187  break;
2188  nextToken();
2189  while (FormatTok->isNot(tok::l_brace)) {
2190  if (FormatTok->is(tok::l_paren)) {
2191  parseParens();
2192  continue;
2193  }
2194  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2195  return;
2196  nextToken();
2197  }
2198  NeedsUnwrappedLine = false;
2199  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2200  parseBlock(/*MustBeDeclaration=*/false);
2201  if (Style.BraceWrapping.BeforeCatch)
2202  addUnwrappedLine();
2203  else
2204  NeedsUnwrappedLine = true;
2205  }
2206  if (NeedsUnwrappedLine)
2207  addUnwrappedLine();
2208 }
2209 
2210 void UnwrappedLineParser::parseNamespace() {
2211  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2212  "'namespace' expected");
2213 
2214  const FormatToken &InitialToken = *FormatTok;
2215  nextToken();
2216  if (InitialToken.is(TT_NamespaceMacro)) {
2217  parseParens();
2218  } else {
2219  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2220  tok::l_square)) {
2221  if (FormatTok->is(tok::l_square))
2222  parseSquare();
2223  else
2224  nextToken();
2225  }
2226  }
2227  if (FormatTok->Tok.is(tok::l_brace)) {
2228  if (ShouldBreakBeforeBrace(Style, InitialToken))
2229  addUnwrappedLine();
2230 
2231  unsigned AddLevels =
2234  DeclarationScopeStack.size() > 1)
2235  ? 1u
2236  : 0u;
2237  bool ManageWhitesmithsBraces =
2238  AddLevels == 0u &&
2240 
2241  // If we're in Whitesmiths mode, indent the brace if we're not indenting
2242  // the whole block.
2243  if (ManageWhitesmithsBraces)
2244  ++Line->Level;
2245 
2246  parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2247  /*MunchSemi=*/true,
2248  /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2249 
2250  // Munch the semicolon after a namespace. This is more common than one would
2251  // think. Putting the semicolon into its own line is very ugly.
2252  if (FormatTok->Tok.is(tok::semi))
2253  nextToken();
2254 
2255  addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2256 
2257  if (ManageWhitesmithsBraces)
2258  --Line->Level;
2259  }
2260  // FIXME: Add error handling.
2261 }
2262 
2263 void UnwrappedLineParser::parseNew() {
2264  assert(FormatTok->is(tok::kw_new) && "'new' expected");
2265  nextToken();
2266 
2267  if (Style.isCSharp()) {
2268  do {
2269  if (FormatTok->is(tok::l_brace))
2270  parseBracedList();
2271 
2272  if (FormatTok->isOneOf(tok::semi, tok::comma))
2273  return;
2274 
2275  nextToken();
2276  } while (!eof());
2277  }
2278 
2279  if (Style.Language != FormatStyle::LK_Java)
2280  return;
2281 
2282  // In Java, we can parse everything up to the parens, which aren't optional.
2283  do {
2284  // There should not be a ;, { or } before the new's open paren.
2285  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2286  return;
2287 
2288  // Consume the parens.
2289  if (FormatTok->is(tok::l_paren)) {
2290  parseParens();
2291 
2292  // If there is a class body of an anonymous class, consume that as child.
2293  if (FormatTok->is(tok::l_brace))
2294  parseChildBlock();
2295  return;
2296  }
2297  nextToken();
2298  } while (!eof());
2299 }
2300 
2301 void UnwrappedLineParser::parseForOrWhileLoop() {
2302  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2303  "'for', 'while' or foreach macro expected");
2304  nextToken();
2305  // JS' for await ( ...
2306  if (Style.Language == FormatStyle::LK_JavaScript &&
2307  FormatTok->is(Keywords.kw_await))
2308  nextToken();
2309  if (FormatTok->Tok.is(tok::l_paren))
2310  parseParens();
2311  if (FormatTok->Tok.is(tok::l_brace)) {
2312  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2313  parseBlock(/*MustBeDeclaration=*/false);
2314  addUnwrappedLine();
2315  } else {
2316  addUnwrappedLine();
2317  ++Line->Level;
2318  parseStructuralElement();
2319  --Line->Level;
2320  }
2321 }
2322 
2323 void UnwrappedLineParser::parseDoWhile() {
2324  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2325  nextToken();
2326  if (FormatTok->Tok.is(tok::l_brace)) {
2327  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2328  parseBlock(/*MustBeDeclaration=*/false);
2329  if (Style.BraceWrapping.BeforeWhile)
2330  addUnwrappedLine();
2331  } else {
2332  addUnwrappedLine();
2333  ++Line->Level;
2334  parseStructuralElement();
2335  --Line->Level;
2336  }
2337 
2338  // FIXME: Add error handling.
2339  if (!FormatTok->Tok.is(tok::kw_while)) {
2340  addUnwrappedLine();
2341  return;
2342  }
2343 
2344  // If in Whitesmiths mode, the line with the while() needs to be indented
2345  // to the same level as the block.
2347  ++Line->Level;
2348 
2349  nextToken();
2350  parseStructuralElement();
2351 }
2352 
2353 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2354  nextToken();
2355  unsigned OldLineLevel = Line->Level;
2356  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2357  --Line->Level;
2358  if (LeftAlignLabel)
2359  Line->Level = 0;
2360 
2361  if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2362  FormatTok->Tok.is(tok::l_brace)) {
2363 
2364  CompoundStatementIndenter Indenter(this, Line->Level,
2366  Style.BraceWrapping.IndentBraces);
2367  parseBlock(/*MustBeDeclaration=*/false);
2368  if (FormatTok->Tok.is(tok::kw_break)) {
2371  addUnwrappedLine();
2372  if (!Style.IndentCaseBlocks &&
2374  Line->Level++;
2375  }
2376  }
2377  parseStructuralElement();
2378  }
2379  addUnwrappedLine();
2380  } else {
2381  if (FormatTok->is(tok::semi))
2382  nextToken();
2383  addUnwrappedLine();
2384  }
2385  Line->Level = OldLineLevel;
2386  if (FormatTok->isNot(tok::l_brace)) {
2387  parseStructuralElement();
2388  addUnwrappedLine();
2389  }
2390 }
2391 
2392 void UnwrappedLineParser::parseCaseLabel() {
2393  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2394 
2395  // FIXME: fix handling of complex expressions here.
2396  do {
2397  nextToken();
2398  } while (!eof() && !FormatTok->Tok.is(tok::colon));
2399  parseLabel();
2400 }
2401 
2402 void UnwrappedLineParser::parseSwitch() {
2403  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2404  nextToken();
2405  if (FormatTok->Tok.is(tok::l_paren))
2406  parseParens();
2407  if (FormatTok->Tok.is(tok::l_brace)) {
2408  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2409  parseBlock(/*MustBeDeclaration=*/false);
2410  addUnwrappedLine();
2411  } else {
2412  addUnwrappedLine();
2413  ++Line->Level;
2414  parseStructuralElement();
2415  --Line->Level;
2416  }
2417 }
2418 
2419 void UnwrappedLineParser::parseAccessSpecifier() {
2420  nextToken();
2421  // Understand Qt's slots.
2422  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2423  nextToken();
2424  // Otherwise, we don't know what it is, and we'd better keep the next token.
2425  if (FormatTok->Tok.is(tok::colon))
2426  nextToken();
2427  addUnwrappedLine();
2428 }
2429 
2430 void UnwrappedLineParser::parseConcept() {
2431  assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2432  nextToken();
2433  if (!FormatTok->Tok.is(tok::identifier))
2434  return;
2435  nextToken();
2436  if (!FormatTok->Tok.is(tok::equal))
2437  return;
2438  nextToken();
2439  if (FormatTok->Tok.is(tok::kw_requires)) {
2440  nextToken();
2441  parseRequiresExpression(Line->Level);
2442  } else {
2443  parseConstraintExpression(Line->Level);
2444  }
2445 }
2446 
2447 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2448  // requires (R range)
2449  if (FormatTok->Tok.is(tok::l_paren)) {
2450  parseParens();
2451  if (Style.IndentRequires && OriginalLevel != Line->Level) {
2452  addUnwrappedLine();
2453  --Line->Level;
2454  }
2455  }
2456 
2457  if (FormatTok->Tok.is(tok::l_brace)) {
2458  if (Style.BraceWrapping.AfterFunction)
2459  addUnwrappedLine();
2460  FormatTok->setType(TT_FunctionLBrace);
2461  parseBlock(/*MustBeDeclaration=*/false);
2462  addUnwrappedLine();
2463  } else {
2464  parseConstraintExpression(OriginalLevel);
2465  }
2466 }
2467 
2468 void UnwrappedLineParser::parseConstraintExpression(
2469  unsigned int OriginalLevel) {
2470  // requires Id<T> && Id<T> || Id<T>
2471  while (
2472  FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2473  nextToken();
2474  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2475  tok::greater, tok::comma, tok::ellipsis)) {
2476  if (FormatTok->Tok.is(tok::less)) {
2477  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2478  /*ClosingBraceKind=*/tok::greater);
2479  continue;
2480  }
2481  nextToken();
2482  }
2483  if (FormatTok->Tok.is(tok::kw_requires)) {
2484  parseRequiresExpression(OriginalLevel);
2485  }
2486  if (FormatTok->Tok.is(tok::less)) {
2487  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2488  /*ClosingBraceKind=*/tok::greater);
2489  }
2490 
2491  if (FormatTok->Tok.is(tok::l_paren)) {
2492  parseParens();
2493  }
2494  if (FormatTok->Tok.is(tok::l_brace)) {
2495  if (Style.BraceWrapping.AfterFunction)
2496  addUnwrappedLine();
2497  FormatTok->setType(TT_FunctionLBrace);
2498  parseBlock(/*MustBeDeclaration=*/false);
2499  }
2500  if (FormatTok->Tok.is(tok::semi)) {
2501  // Eat any trailing semi.
2502  nextToken();
2503  addUnwrappedLine();
2504  }
2505  if (FormatTok->Tok.is(tok::colon)) {
2506  return;
2507  }
2508  if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2509  if (FormatTok->Previous &&
2510  !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2511  tok::coloncolon)) {
2512  addUnwrappedLine();
2513  }
2514  if (Style.IndentRequires && OriginalLevel != Line->Level) {
2515  --Line->Level;
2516  }
2517  break;
2518  } else {
2519  FormatTok->setType(TT_ConstraintJunctions);
2520  }
2521 
2522  nextToken();
2523  }
2524 }
2525 
2526 void UnwrappedLineParser::parseRequires() {
2527  assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2528 
2529  unsigned OriginalLevel = Line->Level;
2530  if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2531  addUnwrappedLine();
2532  if (Style.IndentRequires) {
2533  Line->Level++;
2534  }
2535  }
2536  nextToken();
2537 
2538  parseRequiresExpression(OriginalLevel);
2539 }
2540 
2541 bool UnwrappedLineParser::parseEnum() {
2542  // Won't be 'enum' for NS_ENUMs.
2543  if (FormatTok->Tok.is(tok::kw_enum))
2544  nextToken();
2545 
2546  const FormatToken &InitialToken = *FormatTok;
2547 
2548  // In TypeScript, "enum" can also be used as property name, e.g. in interface
2549  // declarations. An "enum" keyword followed by a colon would be a syntax
2550  // error and thus assume it is just an identifier.
2551  if (Style.Language == FormatStyle::LK_JavaScript &&
2552  FormatTok->isOneOf(tok::colon, tok::question))
2553  return false;
2554 
2555  // In protobuf, "enum" can be used as a field name.
2556  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2557  return false;
2558 
2559  // Eat up enum class ...
2560  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2561  nextToken();
2562 
2563  while (FormatTok->Tok.getIdentifierInfo() ||
2564  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2565  tok::greater, tok::comma, tok::question)) {
2566  nextToken();
2567  // We can have macros or attributes in between 'enum' and the enum name.
2568  if (FormatTok->is(tok::l_paren))
2569  parseParens();
2570  if (FormatTok->is(tok::identifier)) {
2571  nextToken();
2572  // If there are two identifiers in a row, this is likely an elaborate
2573  // return type. In Java, this can be "implements", etc.
2574  if (Style.isCpp() && FormatTok->is(tok::identifier))
2575  return false;
2576  }
2577  }
2578 
2579  // Just a declaration or something is wrong.
2580  if (FormatTok->isNot(tok::l_brace))
2581  return true;
2582  FormatTok->setBlockKind(BK_Block);
2583 
2584  if (Style.Language == FormatStyle::LK_Java) {
2585  // Java enums are different.
2586  parseJavaEnumBody();
2587  return true;
2588  }
2589  if (Style.Language == FormatStyle::LK_Proto) {
2590  parseBlock(/*MustBeDeclaration=*/true);
2591  return true;
2592  }
2593 
2594  if (!Style.AllowShortEnumsOnASingleLine &&
2595  ShouldBreakBeforeBrace(Style, InitialToken))
2596  addUnwrappedLine();
2597  // Parse enum body.
2598  nextToken();
2599  if (!Style.AllowShortEnumsOnASingleLine) {
2600  addUnwrappedLine();
2601  Line->Level += 1;
2602  }
2603  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2604  /*IsEnum=*/true);
2605  if (!Style.AllowShortEnumsOnASingleLine)
2606  Line->Level -= 1;
2607  if (HasError) {
2608  if (FormatTok->is(tok::semi))
2609  nextToken();
2610  addUnwrappedLine();
2611  }
2612  return true;
2613 
2614  // There is no addUnwrappedLine() here so that we fall through to parsing a
2615  // structural element afterwards. Thus, in "enum A {} n, m;",
2616  // "} n, m;" will end up in one unwrapped line.
2617 }
2618 
2619 bool UnwrappedLineParser::parseStructLike() {
2620  // parseRecord falls through and does not yet add an unwrapped line as a
2621  // record declaration or definition can start a structural element.
2622  parseRecord();
2623  // This does not apply to Java, JavaScript and C#.
2624  if (Style.Language == FormatStyle::LK_Java ||
2625  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2626  if (FormatTok->is(tok::semi))
2627  nextToken();
2628  addUnwrappedLine();
2629  return true;
2630  }
2631  return false;
2632 }
2633 
2634 namespace {
2635 // A class used to set and restore the Token position when peeking
2636 // ahead in the token source.
2637 class ScopedTokenPosition {
2638  unsigned StoredPosition;
2639  FormatTokenSource *Tokens;
2640 
2641 public:
2642  ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2643  assert(Tokens && "Tokens expected to not be null");
2644  StoredPosition = Tokens->getPosition();
2645  }
2646 
2647  ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2648 };
2649 } // namespace
2650 
2651 // Look to see if we have [[ by looking ahead, if
2652 // its not then rewind to the original position.
2653 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2654  ScopedTokenPosition AutoPosition(Tokens);
2655  FormatToken *Tok = Tokens->getNextToken();
2656  // We already read the first [ check for the second.
2657  if (Tok && !Tok->is(tok::l_square)) {
2658  return false;
2659  }
2660  // Double check that the attribute is just something
2661  // fairly simple.
2662  while (Tok) {
2663  if (Tok->is(tok::r_square)) {
2664  break;
2665  }
2666  Tok = Tokens->getNextToken();
2667  }
2668  Tok = Tokens->getNextToken();
2669  if (Tok && !Tok->is(tok::r_square)) {
2670  return false;
2671  }
2672  Tok = Tokens->getNextToken();
2673  if (Tok && Tok->is(tok::semi)) {
2674  return false;
2675  }
2676  return true;
2677 }
2678 
2679 void UnwrappedLineParser::parseJavaEnumBody() {
2680  // Determine whether the enum is simple, i.e. does not have a semicolon or
2681  // constants with class bodies. Simple enums can be formatted like braced
2682  // lists, contracted to a single line, etc.
2683  unsigned StoredPosition = Tokens->getPosition();
2684  bool IsSimple = true;
2685  FormatToken *Tok = Tokens->getNextToken();
2686  while (Tok) {
2687  if (Tok->is(tok::r_brace))
2688  break;
2689  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2690  IsSimple = false;
2691  break;
2692  }
2693  // FIXME: This will also mark enums with braces in the arguments to enum
2694  // constants as "not simple". This is probably fine in practice, though.
2695  Tok = Tokens->getNextToken();
2696  }
2697  FormatTok = Tokens->setPosition(StoredPosition);
2698 
2699  if (IsSimple) {
2700  nextToken();
2701  parseBracedList();
2702  addUnwrappedLine();
2703  return;
2704  }
2705 
2706  // Parse the body of a more complex enum.
2707  // First add a line for everything up to the "{".
2708  nextToken();
2709  addUnwrappedLine();
2710  ++Line->Level;
2711 
2712  // Parse the enum constants.
2713  while (FormatTok) {
2714  if (FormatTok->is(tok::l_brace)) {
2715  // Parse the constant's class body.
2716  parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2717  /*MunchSemi=*/false);
2718  } else if (FormatTok->is(tok::l_paren)) {
2719  parseParens();
2720  } else if (FormatTok->is(tok::comma)) {
2721  nextToken();
2722  addUnwrappedLine();
2723  } else if (FormatTok->is(tok::semi)) {
2724  nextToken();
2725  addUnwrappedLine();
2726  break;
2727  } else if (FormatTok->is(tok::r_brace)) {
2728  addUnwrappedLine();
2729  break;
2730  } else {
2731  nextToken();
2732  }
2733  }
2734 
2735  // Parse the class body after the enum's ";" if any.
2736  parseLevel(/*HasOpeningBrace=*/true);
2737  nextToken();
2738  --Line->Level;
2739  addUnwrappedLine();
2740 }
2741 
2742 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2743  const FormatToken &InitialToken = *FormatTok;
2744  nextToken();
2745 
2746  // The actual identifier can be a nested name specifier, and in macros
2747  // it is often token-pasted.
2748  // An [[attribute]] can be before the identifier.
2749  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2750  tok::kw___attribute, tok::kw___declspec,
2751  tok::kw_alignas, tok::l_square, tok::r_square) ||
2752  ((Style.Language == FormatStyle::LK_Java ||
2754  FormatTok->isOneOf(tok::period, tok::comma))) {
2755  if (Style.Language == FormatStyle::LK_JavaScript &&
2756  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2757  // JavaScript/TypeScript supports inline object types in
2758  // extends/implements positions:
2759  // class Foo implements {bar: number} { }
2760  nextToken();
2761  if (FormatTok->is(tok::l_brace)) {
2762  tryToParseBracedList();
2763  continue;
2764  }
2765  }
2766  bool IsNonMacroIdentifier =
2767  FormatTok->is(tok::identifier) &&
2768  FormatTok->TokenText != FormatTok->TokenText.upper();
2769  nextToken();
2770  // We can have macros or attributes in between 'class' and the class name.
2771  if (!IsNonMacroIdentifier) {
2772  if (FormatTok->Tok.is(tok::l_paren)) {
2773  parseParens();
2774  } else if (FormatTok->is(TT_AttributeSquare)) {
2775  parseSquare();
2776  // Consume the closing TT_AttributeSquare.
2777  if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2778  nextToken();
2779  }
2780  }
2781  }
2782 
2783  // Note that parsing away template declarations here leads to incorrectly
2784  // accepting function declarations as record declarations.
2785  // In general, we cannot solve this problem. Consider:
2786  // class A<int> B() {}
2787  // which can be a function definition or a class definition when B() is a
2788  // macro. If we find enough real-world cases where this is a problem, we
2789  // can parse for the 'template' keyword in the beginning of the statement,
2790  // and thus rule out the record production in case there is no template
2791  // (this would still leave us with an ambiguity between template function
2792  // and class declarations).
2793  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2794  while (!eof()) {
2795  if (FormatTok->is(tok::l_brace)) {
2796  calculateBraceTypes(/*ExpectClassBody=*/true);
2797  if (!tryToParseBracedList())
2798  break;
2799  }
2800  if (FormatTok->Tok.is(tok::semi))
2801  return;
2802  if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2803  addUnwrappedLine();
2804  nextToken();
2805  parseCSharpGenericTypeConstraint();
2806  break;
2807  }
2808  nextToken();
2809  }
2810  }
2811  if (FormatTok->Tok.is(tok::l_brace)) {
2812  if (ParseAsExpr) {
2813  parseChildBlock();
2814  } else {
2815  if (ShouldBreakBeforeBrace(Style, InitialToken))
2816  addUnwrappedLine();
2817 
2818  unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2819  parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2820  }
2821  }
2822  // There is no addUnwrappedLine() here so that we fall through to parsing a
2823  // structural element afterwards. Thus, in "class A {} n, m;",
2824  // "} n, m;" will end up in one unwrapped line.
2825 }
2826 
2827 void UnwrappedLineParser::parseObjCMethod() {
2828  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2829  "'(' or identifier expected.");
2830  do {
2831  if (FormatTok->Tok.is(tok::semi)) {
2832  nextToken();
2833  addUnwrappedLine();
2834  return;
2835  } else if (FormatTok->Tok.is(tok::l_brace)) {
2836  if (Style.BraceWrapping.AfterFunction)
2837  addUnwrappedLine();
2838  parseBlock(/*MustBeDeclaration=*/false);
2839  addUnwrappedLine();
2840  return;
2841  } else {
2842  nextToken();
2843  }
2844  } while (!eof());
2845 }
2846 
2847 void UnwrappedLineParser::parseObjCProtocolList() {
2848  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2849  do {
2850  nextToken();
2851  // Early exit in case someone forgot a close angle.
2852  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2853  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2854  return;
2855  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2856  nextToken(); // Skip '>'.
2857 }
2858 
2859 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2860  do {
2861  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2862  nextToken();
2863  addUnwrappedLine();
2864  break;
2865  }
2866  if (FormatTok->is(tok::l_brace)) {
2867  parseBlock(/*MustBeDeclaration=*/false);
2868  // In ObjC interfaces, nothing should be following the "}".
2869  addUnwrappedLine();
2870  } else if (FormatTok->is(tok::r_brace)) {
2871  // Ignore stray "}". parseStructuralElement doesn't consume them.
2872  nextToken();
2873  addUnwrappedLine();
2874  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2875  nextToken();
2876  parseObjCMethod();
2877  } else {
2878  parseStructuralElement();
2879  }
2880  } while (!eof());
2881 }
2882 
2883 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2884  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2885  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2886  nextToken();
2887  nextToken(); // interface name
2888 
2889  // @interface can be followed by a lightweight generic
2890  // specialization list, then either a base class or a category.
2891  if (FormatTok->Tok.is(tok::less)) {
2892  parseObjCLightweightGenerics();
2893  }
2894  if (FormatTok->Tok.is(tok::colon)) {
2895  nextToken();
2896  nextToken(); // base class name
2897  // The base class can also have lightweight generics applied to it.
2898  if (FormatTok->Tok.is(tok::less)) {
2899  parseObjCLightweightGenerics();
2900  }
2901  } else if (FormatTok->Tok.is(tok::l_paren))
2902  // Skip category, if present.
2903  parseParens();
2904 
2905  if (FormatTok->Tok.is(tok::less))
2906  parseObjCProtocolList();
2907 
2908  if (FormatTok->Tok.is(tok::l_brace)) {
2910  addUnwrappedLine();
2911  parseBlock(/*MustBeDeclaration=*/true);
2912  }
2913 
2914  // With instance variables, this puts '}' on its own line. Without instance
2915  // variables, this ends the @interface line.
2916  addUnwrappedLine();
2917 
2918  parseObjCUntilAtEnd();
2919 }
2920 
2921 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2922  assert(FormatTok->Tok.is(tok::less));
2923  // Unlike protocol lists, generic parameterizations support
2924  // nested angles:
2925  //
2926  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2927  // NSObject <NSCopying, NSSecureCoding>
2928  //
2929  // so we need to count how many open angles we have left.
2930  unsigned NumOpenAngles = 1;
2931  do {
2932  nextToken();
2933  // Early exit in case someone forgot a close angle.
2934  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2935  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2936  break;
2937  if (FormatTok->Tok.is(tok::less))
2938  ++NumOpenAngles;
2939  else if (FormatTok->Tok.is(tok::greater)) {
2940  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2941  --NumOpenAngles;
2942  }
2943  } while (!eof() && NumOpenAngles != 0);
2944  nextToken(); // Skip '>'.
2945 }
2946 
2947 // Returns true for the declaration/definition form of @protocol,
2948 // false for the expression form.
2949 bool UnwrappedLineParser::parseObjCProtocol() {
2950  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2951  nextToken();
2952 
2953  if (FormatTok->is(tok::l_paren))
2954  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2955  return false;
2956 
2957  // The definition/declaration form,
2958  // @protocol Foo
2959  // - (int)someMethod;
2960  // @end
2961 
2962  nextToken(); // protocol name
2963 
2964  if (FormatTok->Tok.is(tok::less))
2965  parseObjCProtocolList();
2966 
2967  // Check for protocol declaration.
2968  if (FormatTok->Tok.is(tok::semi)) {
2969  nextToken();
2970  addUnwrappedLine();
2971  return true;
2972  }
2973 
2974  addUnwrappedLine();
2975  parseObjCUntilAtEnd();
2976  return true;
2977 }
2978 
2979 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2980  bool IsImport = FormatTok->is(Keywords.kw_import);
2981  assert(IsImport || FormatTok->is(tok::kw_export));
2982  nextToken();
2983 
2984  // Consume the "default" in "export default class/function".
2985  if (FormatTok->is(tok::kw_default))
2986  nextToken();
2987 
2988  // Consume "async function", "function" and "default function", so that these
2989  // get parsed as free-standing JS functions, i.e. do not require a trailing
2990  // semicolon.
2991  if (FormatTok->is(Keywords.kw_async))
2992  nextToken();
2993  if (FormatTok->is(Keywords.kw_function)) {
2994  nextToken();
2995  return;
2996  }
2997 
2998  // For imports, `export *`, `export {...}`, consume the rest of the line up
2999  // to the terminating `;`. For everything else, just return and continue
3000  // parsing the structural element, i.e. the declaration or expression for
3001  // `export default`.
3002  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3003  !FormatTok->isStringLiteral())
3004  return;
3005 
3006  while (!eof()) {
3007  if (FormatTok->is(tok::semi))
3008  return;
3009  if (Line->Tokens.empty()) {
3010  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3011  // import statement should terminate.
3012  return;
3013  }
3014  if (FormatTok->is(tok::l_brace)) {
3015  FormatTok->setBlockKind(BK_Block);
3016  nextToken();
3017  parseBracedList();
3018  } else {
3019  nextToken();
3020  }
3021  }
3022 }
3023 
3024 void UnwrappedLineParser::parseStatementMacro() {
3025  nextToken();
3026  if (FormatTok->is(tok::l_paren))
3027  parseParens();
3028  if (FormatTok->is(tok::semi))
3029  nextToken();
3030  addUnwrappedLine();
3031 }
3032 
3033 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3034  StringRef Prefix = "") {
3035  llvm::dbgs() << Prefix << "Line(" << Line.Level
3036  << ", FSC=" << Line.FirstStartColumn << ")"
3037  << (Line.InPPDirective ? " MACRO" : "") << ": ";
3038  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3039  E = Line.Tokens.end();
3040  I != E; ++I) {
3041  llvm::dbgs() << I->Tok->Tok.getName() << "["
3042  << "T=" << (unsigned)I->Tok->getType()
3043  << ", OC=" << I->Tok->OriginalColumn << "] ";
3044  }
3045  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3046  E = Line.Tokens.end();
3047  I != E; ++I) {
3048  const UnwrappedLineNode &Node = *I;
3050  I = Node.Children.begin(),
3051  E = Node.Children.end();
3052  I != E; ++I) {
3053  printDebugInfo(*I, "\nChild: ");
3054  }
3055  }
3056  llvm::dbgs() << "\n";
3057 }
3058 
3059 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3060  if (Line->Tokens.empty())
3061  return;
3062  LLVM_DEBUG({
3063  if (CurrentLines == &Lines)
3064  printDebugInfo(*Line);
3065  });
3066 
3067  // If this line closes a block when in Whitesmiths mode, remember that
3068  // information so that the level can be decreased after the line is added.
3069  // This has to happen after the addition of the line since the line itself
3070  // needs to be indented.
3071  bool ClosesWhitesmithsBlock =
3072  Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3074 
3075  CurrentLines->push_back(std::move(*Line));
3076  Line->Tokens.clear();
3077  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3078  Line->FirstStartColumn = 0;
3079 
3080  if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3081  --Line->Level;
3082  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3083  CurrentLines->append(
3084  std::make_move_iterator(PreprocessorDirectives.begin()),
3085  std::make_move_iterator(PreprocessorDirectives.end()));
3086  PreprocessorDirectives.clear();
3087  }
3088  // Disconnect the current token from the last token on the previous line.
3089  FormatTok->Previous = nullptr;
3090 }
3091 
3092 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3093 
3094 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3095  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3096  FormatTok.NewlinesBefore > 0;
3097 }
3098 
3099 // Checks if \p FormatTok is a line comment that continues the line comment
3100 // section on \p Line.
3101 static bool
3103  const UnwrappedLine &Line,
3104  const llvm::Regex &CommentPragmasRegex) {
3105  if (Line.Tokens.empty())
3106  return false;
3107 
3108  StringRef IndentContent = FormatTok.TokenText;
3109  if (FormatTok.TokenText.startswith("//") ||
3110  FormatTok.TokenText.startswith("/*"))
3111  IndentContent = FormatTok.TokenText.substr(2);
3112  if (CommentPragmasRegex.match(IndentContent))
3113  return false;
3114 
3115  // If Line starts with a line comment, then FormatTok continues the comment
3116  // section if its original column is greater or equal to the original start
3117  // column of the line.
3118  //
3119  // Define the min column token of a line as follows: if a line ends in '{' or
3120  // contains a '{' followed by a line comment, then the min column token is
3121  // that '{'. Otherwise, the min column token of the line is the first token of
3122  // the line.
3123  //
3124  // If Line starts with a token other than a line comment, then FormatTok
3125  // continues the comment section if its original column is greater than the
3126  // original start column of the min column token of the line.
3127  //
3128  // For example, the second line comment continues the first in these cases:
3129  //
3130  // // first line
3131  // // second line
3132  //
3133  // and:
3134  //
3135  // // first line
3136  // // second line
3137  //
3138  // and:
3139  //
3140  // int i; // first line
3141  // // second line
3142  //
3143  // and:
3144  //
3145  // do { // first line
3146  // // second line
3147  // int i;
3148  // } while (true);
3149  //
3150  // and:
3151  //
3152  // enum {
3153  // a, // first line
3154  // // second line
3155  // b
3156  // };
3157  //
3158  // The second line comment doesn't continue the first in these cases:
3159  //
3160  // // first line
3161  // // second line
3162  //
3163  // and:
3164  //
3165  // int i; // first line
3166  // // second line
3167  //
3168  // and:
3169  //
3170  // do { // first line
3171  // // second line
3172  // int i;
3173  // } while (true);
3174  //
3175  // and:
3176  //
3177  // enum {
3178  // a, // first line
3179  // // second line
3180  // };
3181  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3182 
3183  // Scan for '{//'. If found, use the column of '{' as a min column for line
3184  // comment section continuation.
3185  const FormatToken *PreviousToken = nullptr;
3186  for (const UnwrappedLineNode &Node : Line.Tokens) {
3187  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3188  isLineComment(*Node.Tok)) {
3189  MinColumnToken = PreviousToken;
3190  break;
3191  }
3192  PreviousToken = Node.Tok;
3193 
3194  // Grab the last newline preceding a token in this unwrapped line.
3195  if (Node.Tok->NewlinesBefore > 0) {
3196  MinColumnToken = Node.Tok;
3197  }
3198  }
3199  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3200  MinColumnToken = PreviousToken;
3201  }
3202 
3203  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3204  MinColumnToken);
3205 }
3206 
3207 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3208  bool JustComments = Line->Tokens.empty();
3210  I = CommentsBeforeNextToken.begin(),
3211  E = CommentsBeforeNextToken.end();
3212  I != E; ++I) {
3213  // Line comments that belong to the same line comment section are put on the
3214  // same line since later we might want to reflow content between them.
3215  // Additional fine-grained breaking of line comment sections is controlled
3216  // by the class BreakableLineCommentSection in case it is desirable to keep
3217  // several line comment sections in the same unwrapped line.
3218  //
3219  // FIXME: Consider putting separate line comment sections as children to the
3220  // unwrapped line instead.
3221  (*I)->ContinuesLineCommentSection =
3222  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3223  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3224  addUnwrappedLine();
3225  pushToken(*I);
3226  }
3227  if (NewlineBeforeNext && JustComments)
3228  addUnwrappedLine();
3229  CommentsBeforeNextToken.clear();
3230 }
3231 
3232 void UnwrappedLineParser::nextToken(int LevelDifference) {
3233  if (eof())
3234  return;
3235  flushComments(isOnNewLine(*FormatTok));
3236  pushToken(FormatTok);
3237  FormatToken *Previous = FormatTok;
3238  if (Style.Language != FormatStyle::LK_JavaScript)
3239  readToken(LevelDifference);
3240  else
3241  readTokenWithJavaScriptASI();
3242  FormatTok->Previous = Previous;
3243 }
3244 
3245 void UnwrappedLineParser::distributeComments(
3246  const SmallVectorImpl<FormatToken *> &Comments,
3247  const FormatToken *NextTok) {
3248  // Whether or not a line comment token continues a line is controlled by
3249  // the method continuesLineCommentSection, with the following caveat:
3250  //
3251  // Define a trail of Comments to be a nonempty proper postfix of Comments such
3252  // that each comment line from the trail is aligned with the next token, if
3253  // the next token exists. If a trail exists, the beginning of the maximal
3254  // trail is marked as a start of a new comment section.
3255  //
3256  // For example in this code:
3257  //
3258  // int a; // line about a
3259  // // line 1 about b
3260  // // line 2 about b
3261  // int b;
3262  //
3263  // the two lines about b form a maximal trail, so there are two sections, the
3264  // first one consisting of the single comment "// line about a" and the
3265  // second one consisting of the next two comments.
3266  if (Comments.empty())
3267  return;
3268  bool ShouldPushCommentsInCurrentLine = true;
3269  bool HasTrailAlignedWithNextToken = false;
3270  unsigned StartOfTrailAlignedWithNextToken = 0;
3271  if (NextTok) {
3272  // We are skipping the first element intentionally.
3273  for (unsigned i = Comments.size() - 1; i > 0; --i) {
3274  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3275  HasTrailAlignedWithNextToken = true;
3276  StartOfTrailAlignedWithNextToken = i;
3277  }
3278  }
3279  }
3280  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3281  FormatToken *FormatTok = Comments[i];
3282  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3283  FormatTok->ContinuesLineCommentSection = false;
3284  } else {
3285  FormatTok->ContinuesLineCommentSection =
3286  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3287  }
3288  if (!FormatTok->ContinuesLineCommentSection &&
3289  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3290  ShouldPushCommentsInCurrentLine = false;
3291  }
3292  if (ShouldPushCommentsInCurrentLine) {
3293  pushToken(FormatTok);
3294  } else {
3295  CommentsBeforeNextToken.push_back(FormatTok);
3296  }
3297  }
3298 }
3299 
3300 void UnwrappedLineParser::readToken(int LevelDifference) {
3301  SmallVector<FormatToken *, 1> Comments;
3302  do {
3303  FormatTok = Tokens->getNextToken();
3304  assert(FormatTok);
3305  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3306  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3307  distributeComments(Comments, FormatTok);
3308  Comments.clear();
3309  // If there is an unfinished unwrapped line, we flush the preprocessor
3310  // directives only after that unwrapped line was finished later.
3311  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3312  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3313  assert((LevelDifference >= 0 ||
3314  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3315  "LevelDifference makes Line->Level negative");
3316  Line->Level += LevelDifference;
3317  // Comments stored before the preprocessor directive need to be output
3318  // before the preprocessor directive, at the same level as the
3319  // preprocessor directive, as we consider them to apply to the directive.
3321  PPBranchLevel > 0)
3322  Line->Level += PPBranchLevel;
3323  flushComments(isOnNewLine(*FormatTok));
3324  parsePPDirective();
3325  }
3326  while (FormatTok->getType() == TT_ConflictStart ||
3327  FormatTok->getType() == TT_ConflictEnd ||
3328  FormatTok->getType() == TT_ConflictAlternative) {
3329  if (FormatTok->getType() == TT_ConflictStart) {
3330  conditionalCompilationStart(/*Unreachable=*/false);
3331  } else if (FormatTok->getType() == TT_ConflictAlternative) {
3332  conditionalCompilationAlternative();
3333  } else if (FormatTok->getType() == TT_ConflictEnd) {
3334  conditionalCompilationEnd();
3335  }
3336  FormatTok = Tokens->getNextToken();
3337  FormatTok->MustBreakBefore = true;
3338  }
3339 
3340  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3341  !Line->InPPDirective) {
3342  continue;
3343  }
3344 
3345  if (!FormatTok->Tok.is(tok::comment)) {
3346  distributeComments(Comments, FormatTok);
3347  Comments.clear();
3348  return;
3349  }
3350 
3351  Comments.push_back(FormatTok);
3352  } while (!eof());
3353 
3354  distributeComments(Comments, nullptr);
3355  Comments.clear();
3356 }
3357 
3358 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3359  Line->Tokens.push_back(UnwrappedLineNode(Tok));
3360  if (MustBreakBeforeNextToken) {
3361  Line->Tokens.back().Tok->MustBreakBefore = true;
3362  MustBreakBeforeNextToken = false;
3363  }
3364 }
3365 
3366 } // end namespace format
3367 } // end namespace clang
clang::format::CompoundStatementIndenter
Definition: UnwrappedLineParser.cpp:172
clang::minimize_source_to_dependency_directives::pp_ifdef
@ pp_ifdef
Definition: DependencyDirectivesSourceMinimizer.h:44
clang::format::AdditionalKeywords::kw_implements
IdentifierInfo * kw_implements
Definition: FormatToken.h:997
clang::format::ScopedLineState
Definition: UnwrappedLineParser.cpp:139
clang::format::FormatTokenSource::getNextToken
virtual FormatToken * getNextToken()=0
clang::format::FormatStyle::BraceWrappingFlags::AfterStruct
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1622
clang::minimize_source_to_dependency_directives::pp_elifdef
@ pp_elifdef
Definition: DependencyDirectivesSourceMinimizer.h:47
clang::format::AdditionalKeywords::kw_NS_OPTIONS
IdentifierInfo * kw_NS_OPTIONS
Definition: FormatToken.h:967
clang::format::AdditionalKeywords::kw_abstract
IdentifierInfo * kw_abstract
Definition: FormatToken.h:994
clang::format::AdditionalKeywords::kw_CF_ENUM
IdentifierInfo * kw_CF_ENUM
Definition: FormatToken.h:963
clang::format::isJSDeclOrStmt
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:989
clang::format::ScopedLineState::~ScopedLineState
~ScopedLineState()
Definition: UnwrappedLineParser.cpp:154
clang::format::mustBeJSIdent
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:965
clang::format::FormatStyle::BraceWrappingFlags::AfterControlStatement
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1560
clang::format::AdditionalKeywords::kw_internal
IdentifierInfo * kw_internal
Definition: FormatToken.h:1033
clang::format::AdditionalKeywords::kw_instanceof
IdentifierInfo * kw_instanceof
Definition: FormatToken.h:998
clang::format::BK_Unknown
@ BK_Unknown
Definition: FormatToken.h:136
clang::format::FormatStyle::AllowShortEnumsOnASingleLine
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:558
clang::format::FormatStyle::IndentAccessModifiers
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2368
clang::format::AdditionalKeywords::kw_from
IdentifierInfo * kw_from
Definition: FormatToken.h:978
clang::format::UnwrappedLineParser::parse
void parse()
Definition: UnwrappedLineParser.cpp:256
clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
clang::format::UnwrappedLine::kInvalidIndex
static const size_t kInvalidIndex
Definition: UnwrappedLineParser.h:61
clang::format::AnnotatedLine::Level
unsigned Level
Definition: TokenAnnotator.h:129
clang::format::AdditionalKeywords::kw_is
IdentifierInfo * kw_is
Definition: FormatToken.h:983
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:59
clang::minimize_source_to_dependency_directives::pp_ifndef
@ pp_ifndef
Definition: DependencyDirectivesSourceMinimizer.h:45
clang::Parser
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:60
clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:227
clang::format::AdditionalKeywords::kw_CF_CLOSED_ENUM
IdentifierInfo * kw_CF_CLOSED_ENUM
Definition: FormatToken.h:962
clang::format::FormatStyle::IndentCaseLabels
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2387
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:210
clang::format::AdditionalKeywords
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:841
clang::format::FormatToken::isNot
bool isNot(T Kind) const
Definition: FormatToken.h:471
clang::minimize_source_to_dependency_directives::pp_define
@ pp_define
Definition: DependencyDirectivesSourceMinimizer.h:37
clang::format::FormatStyle::IndentCaseBlocks
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2410
clang::format::UnwrappedLine
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
Definition: UnwrappedLineParser.h:36
clang::format::AdditionalKeywords::kw_as
IdentifierInfo * kw_as
Definition: FormatToken.h:973
clang::format::AdditionalKeywords::kw_signals
IdentifierInfo * kw_signals
Definition: FormatToken.h:1017
clang::format::FormatStyle::LK_TableGen
@ LK_TableGen
Should be used for TableGen code.
Definition: Format.h:2664
clang::format::FormatStyle::BraceWrappingFlags::AfterFunction
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1588
clang::format::BK_BracedInit
@ BK_BracedInit
Definition: FormatToken.h:136
clang::format::UnwrappedLineParser
Definition: UnwrappedLineParser.h:75
clang::format::AnnotatedLine::FirstStartColumn
unsigned FirstStartColumn
Definition: TokenAnnotator.h:148
clang::format::FormatTokenSource::~FormatTokenSource
virtual ~FormatTokenSource()
Definition: UnwrappedLineParser.cpp:30
clang::format::FormatStyle::BraceWrappingFlags::AfterExternBlock
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1650
size_t
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c-base.h:102
clang::minimize_source_to_dependency_directives::pp_if
@ pp_if
Definition: DependencyDirectivesSourceMinimizer.h:43
clang::format::AdditionalKeywords::kw_import
IdentifierInfo * kw_import
Definition: FormatToken.h:981
clang::format::UnwrappedLineParser::CompoundStatementIndenter
friend class CompoundStatementIndenter
Definition: UnwrappedLineParser.h:297
clang::format::FormatStyle::LK_Proto
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:2662
clang::format::AdditionalKeywords::kw_await
IdentifierInfo * kw_await
Definition: FormatToken.h:975
clang::format::FormatStyle::BraceWrappingFlags::AfterUnion
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1636
clang::format::AdditionalKeywords::kw_extends
IdentifierInfo * kw_extends
Definition: FormatToken.h:996
clang::format::AdditionalKeywords::kw_async
IdentifierInfo * kw_async
Definition: FormatToken.h:974
clang::format::FormatToken::Previous
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:430
clang::format::FormatStyle::BraceWrappingFlags::BeforeElse
bool BeforeElse
Wrap before else.
Definition: Format.h:1680
clang::format::CompoundStatementIndenter::~CompoundStatementIndenter
~CompoundStatementIndenter()
Definition: UnwrappedLineParser.cpp:187
clang::format::AdditionalKeywords::kw_qslots
IdentifierInfo * kw_qslots
Definition: FormatToken.h:1020
clang::format::FormatStyle::NI_All
@ NI_All
Indent in all namespaces.
Definition: Format.h:2788
clang::format::AdditionalKeywords::kw_NS_ENUM
IdentifierInfo * kw_NS_ENUM
Definition: FormatToken.h:966
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:67
clang::format::FormatToken::Tok
Token Tok
The Token.
Definition: FormatToken.h:221
clang::format::UnwrappedLineConsumer
Definition: UnwrappedLineParser.h:66
clang::format::AdditionalKeywords::kw_throws
IdentifierInfo * kw_throws
Definition: FormatToken.h:1003
clang::format::ShouldBreakBeforeBrace
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Definition: UnwrappedLineParser.cpp:710
clang::format::FormatStyle::LK_TextProto
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2667
clang::format::continuesLineCommentSection
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex)
Definition: UnwrappedLineParser.cpp:3102
clang::format::FormatStyle::IndentExternBlock
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2502
clang::format::AdditionalKeywords::kw_in
IdentifierInfo * kw_in
Definition: FormatToken.h:960
clang::format::CompoundStatementIndenter::CompoundStatementIndenter
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
Definition: UnwrappedLineParser.cpp:174
clang::format::AdditionalKeywords::kw_var
IdentifierInfo * kw_var
Definition: FormatToken.h:990
clang::format::FormatStyle::NamespaceIndentation
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:2793
clang::format::AdditionalKeywords::kw_of
IdentifierInfo * kw_of
Definition: FormatToken.h:961
clang::format::FormatStyle::BreakBeforeBraces
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:1488
clang::format::isGoogScope
static bool isGoogScope(const UnwrappedLine &Line)
Definition: UnwrappedLineParser.cpp:674
clang::format::FormatStyle::BraceWrappingFlags::AfterObjCDeclaration
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1608
clang::format::FormatStyle::BWACS_Always
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1521
clang::format::AdditionalKeywords::kw_finally
IdentifierInfo * kw_finally
Definition: FormatToken.h:977
clang::format::AdditionalKeywords::kw_override
IdentifierInfo * kw_override
Definition: FormatToken.h:959
clang::format::BK_Block
@ BK_Block
Definition: FormatToken.h:136
clang::minimize_source_to_dependency_directives::pp_include
@ pp_include
Definition: DependencyDirectivesSourceMinimizer.h:35
clang::minimize_source_to_dependency_directives::pp_elifndef
@ pp_elifndef
Definition: DependencyDirectivesSourceMinimizer.h:48
clang::format::FormatStyle::BraceWrappingFlags::IndentBraces
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1713
clang::format::FormatTokenSource::setPosition
virtual FormatToken * setPosition(unsigned Position)=0
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:132
clang::format::FormatStyle::IndentGotoLabels
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2427
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::CompoundStatementIndenter::CompoundStatementIndenter
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
Definition: UnwrappedLineParser.cpp:179
clang::format::FormatStyle::LK_JavaScript
@ LK_JavaScript
Should be used for JavaScript.
Definition: Format.h:2655
clang::minimize_source_to_dependency_directives::pp_else
@ pp_else
Definition: DependencyDirectivesSourceMinimizer.h:49
clang::format::FormatStyle::LK_Java
@ LK_Java
Should be used for Java.
Definition: Format.h:2653
clang::format::hash_combine
static void hash_combine(std::size_t &seed, const T &v)
Definition: UnwrappedLineParser.cpp:579
clang::format::FormatStyle::BraceWrappingFlags::BeforeCatch
bool BeforeCatch
Wrap before catch.
Definition: Format.h:1666
clang::Token::isLiteral
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
UnwrappedLineParser.h
clang::format::FormatStyle::BraceWrapping
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1763
clang::format::UnwrappedLineParser::ScopedLineState
friend class ScopedLineState
Definition: UnwrappedLineParser.h:296
clang::format::FormatStyle::BS_Whitesmiths
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:1371
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
clang::format::FormatStyle::isCSharp
bool isCSharp() const
Definition: Format.h:2670
P
StringRef P
Definition: ASTMatchersInternal.cpp:563
false
#define false
Definition: stdbool.h:17
clang::format::FormatStyle::Language
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:2675
clang::format::AdditionalKeywords::kw_let
IdentifierInfo * kw_let
Definition: FormatToken.h:984
llvm::ArrayRef
Definition: LLVM.h:34
clang::format::AdditionalKeywords::kw_get
IdentifierInfo * kw_get
Definition: FormatToken.h:980
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:452
clang::format::AdditionalKeywords::kw_set
IdentifierInfo * kw_set
Definition: FormatToken.h:987
clang::format::FormatToken::isOneOf
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:464
clang::format::ScopedLineState::ScopedLineState
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Definition: UnwrappedLineParser.cpp:141
clang::format::FormatStyle::NI_Inner
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:2778
clang::format::AdditionalKeywords::kw_function
IdentifierInfo * kw_function
Definition: FormatToken.h:979
clang::format::FormatStyle::IndentRequires
bool IndentRequires
Indent the requires clause in a template.
Definition: Format.h:2521
clang::format::UnwrappedLineConsumer::consumeUnwrappedLine
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
clang::format::FormatTokenSource::getPosition
virtual unsigned getPosition()=0
clang::format::UnwrappedLineNode
Definition: UnwrappedLineParser.h:300
clang::format::AdditionalKeywords::kw_interface
IdentifierInfo * kw_interface
Definition: FormatToken.h:999
clang::format::FormatStyle::BraceWrappingFlags::AfterClass
bool AfterClass
Wrap class definitions.
Definition: Format.h:1557
clang
Definition: CalledOnceCheck.h:17
Text
StringRef Text
Definition: Format.cpp:2334
clang::format::UnwrappedLineConsumer::finishRun
virtual void finishRun()=0
clang::format::AdditionalKeywords::kw_yield
IdentifierInfo * kw_yield
Definition: FormatToken.h:991
clang::format::AdditionalKeywords::kw___except
IdentifierInfo * kw___except
Definition: FormatToken.h:968
clang::format::FormatStyle::PPDIS_None
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2439
clang::prec::Level
Level
Definition: OperatorPrecedence.h:26
clang::format::FormatStyle::BraceWrappingFlags::AfterNamespace
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1604
clang::format::FormatStyle::isCpp
bool isCpp() const
Definition: Format.h:2669
clang::format::AdditionalKeywords::kw_NS_CLOSED_ENUM
IdentifierInfo * kw_NS_CLOSED_ENUM
Definition: FormatToken.h:965
unsigned
clang::minimize_source_to_dependency_directives::pp_endif
@ pp_endif
Definition: DependencyDirectivesSourceMinimizer.h:50
clang::format::isC78Type
static bool isC78Type(const FormatToken &Tok)
Definition: UnwrappedLineParser.cpp:1009
clang::format::AdditionalKeywords::kw_CF_OPTIONS
IdentifierInfo * kw_CF_OPTIONS
Definition: FormatToken.h:964
clang::format::printDebugInfo
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
Definition: UnwrappedLineParser.cpp:3033
clang::format::FormatStyle::BraceWrappingFlags::BeforeWhile
bool BeforeWhile
Wrap before while.
Definition: Format.h:1711
FormatToken.h
v
do v
Definition: arm_acle.h:76
clang::format::FormatStyle::IEBS_Indent
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2497
clang::format::AdditionalKeywords::kw_qsignals
IdentifierInfo * kw_qsignals
Definition: FormatToken.h:1018
clang::format::AdditionalKeywords::kw_where
IdentifierInfo * kw_where
Definition: FormatToken.h:1053
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
llvm::SmallVectorImpl
Definition: LLVM.h:39
clang::format::mustBeJSIdentOrValue
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:980
Indenter
ContinuationIndenter * Indenter
Definition: UnwrappedLineFormatter.cpp:883
clang::format::FormatStyle::IndentPPDirectives
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2462
Previous
StateNode * Previous
Definition: UnwrappedLineFormatter.cpp:988
clang::format::tokenCanStartNewLine
static bool tokenCanStartNewLine(const FormatToken &Tok)
Definition: UnwrappedLineParser.cpp:939
clang::format::isIIFE
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Definition: UnwrappedLineParser.cpp:692
clang::format::AdditionalKeywords::kw_slots
IdentifierInfo * kw_slots
Definition: FormatToken.h:1019
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:62
clang::format::FormatStyle::BraceWrappingFlags::AfterCaseLabel
bool AfterCaseLabel
Wrap case labels.
Definition: Format.h:1547
clang::minimize_source_to_dependency_directives::pp_elif
@ pp_elif
Definition: DependencyDirectivesSourceMinimizer.h:46
clang::format::UnwrappedLineParser::UnwrappedLineParser
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback)
Definition: UnwrappedLineParser.cpp:225
clang::format::FormatStyle::PPDIS_BeforeHash
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2457
clang::format::isC78ParameterDecl
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
Definition: UnwrappedLineParser.cpp:1022
clang::format::FormatTokenSource
Definition: UnwrappedLineParser.cpp:28