clang  13.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
56  UnwrappedLine &Line;
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
127  UnwrappedLine &Line;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
133  FormatToken *Token;
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = std::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : CompoundStatementIndenter(Parser, LineLevel,
177  Style.BraceWrapping.AfterControlStatement,
178  Style.BraceWrapping.IndentBraces) {}
180  bool WrapBrace, bool IndentBrace)
181  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182  if (WrapBrace)
183  Parser->addUnwrappedLine();
184  if (IndentBrace)
185  ++LineLevel;
186  }
187  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190  unsigned &LineLevel;
191  unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199  : Tokens(Tokens), Position(-1) {}
200 
201  FormatToken *getNextToken() override {
202  ++Position;
203  return Tokens[Position];
204  }
205 
206  unsigned getPosition() override {
207  assert(Position >= 0);
208  return Position;
209  }
210 
211  FormatToken *setPosition(unsigned P) override {
212  Position = P;
213  return Tokens[Position];
214  }
215 
216  void reset() { Position = -1; }
217 
218 private:
219  ArrayRef<FormatToken *> Tokens;
220  int Position;
221 };
222 
223 } // end anonymous namespace
224 
226  const AdditionalKeywords &Keywords,
227  unsigned FirstStartColumn,
229  UnwrappedLineConsumer &Callback)
230  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235  ? IG_Rejected
236  : IG_Inited),
237  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240  PPBranchLevel = -1;
241  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242  ? IG_Rejected
243  : IG_Inited;
244  IncludeGuardToken = nullptr;
245  Line.reset(new UnwrappedLine);
246  CommentsBeforeNextToken.clear();
247  FormatTok = nullptr;
248  MustBreakBeforeNextToken = false;
249  PreprocessorDirectives.clear();
250  CurrentLines = &Lines;
251  DeclarationScopeStack.clear();
252  PPStack.clear();
253  Line->FirstStartColumn = FirstStartColumn;
254 }
255 
257  IndexedTokenSource TokenSource(AllTokens);
258  Line->FirstStartColumn = FirstStartColumn;
259  do {
260  LLVM_DEBUG(llvm::dbgs() << "----\n");
261  reset();
262  Tokens = &TokenSource;
263  TokenSource.reset();
264 
265  readToken();
266  parseFile();
267 
268  // If we found an include guard then all preprocessor directives (other than
269  // the guard) are over-indented by one.
270  if (IncludeGuard == IG_Found)
271  for (auto &Line : Lines)
272  if (Line.InPPDirective && Line.Level > 0)
273  --Line.Level;
274 
275  // Create line with eof token.
276  pushToken(FormatTok);
277  addUnwrappedLine();
278 
279  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280  E = Lines.end();
281  I != E; ++I) {
282  Callback.consumeUnwrappedLine(*I);
283  }
284  Callback.finishRun();
285  Lines.clear();
286  while (!PPLevelBranchIndex.empty() &&
287  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290  }
291  if (!PPLevelBranchIndex.empty()) {
292  ++PPLevelBranchIndex.back();
293  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295  }
296  } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300  // The top-level context in a file always has declarations, except for pre-
301  // processor directives and JavaScript files.
302  bool MustBeDeclaration =
303  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305  MustBeDeclaration);
306  if (Style.Language == FormatStyle::LK_TextProto)
307  parseBracedList();
308  else
309  parseLevel(/*HasOpeningBrace=*/false);
310  // Make sure to format the remaining tokens.
311  //
312  // LK_TextProto is special since its top-level is parsed as the body of a
313  // braced list, which does not necessarily have natural line separators such
314  // as a semicolon. Comments after the last entry that have been determined to
315  // not belong to that line, as in:
316  // key: value
317  // // endfile comment
318  // do not have a chance to be put on a line of their own until this point.
319  // Here we add this newline before end-of-file comments.
320  if (Style.Language == FormatStyle::LK_TextProto &&
321  !CommentsBeforeNextToken.empty())
322  addUnwrappedLine();
323  flushComments(true);
324  addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328  do {
329  switch (FormatTok->Tok.getKind()) {
330  case tok::l_brace:
331  return;
332  default:
333  if (FormatTok->is(Keywords.kw_where)) {
334  addUnwrappedLine();
335  nextToken();
336  parseCSharpGenericTypeConstraint();
337  break;
338  }
339  nextToken();
340  break;
341  }
342  } while (!eof());
343 }
344 
345 void UnwrappedLineParser::parseCSharpAttribute() {
346  int UnpairedSquareBrackets = 1;
347  do {
348  switch (FormatTok->Tok.getKind()) {
349  case tok::r_square:
350  nextToken();
351  --UnpairedSquareBrackets;
352  if (UnpairedSquareBrackets == 0) {
353  addUnwrappedLine();
354  return;
355  }
356  break;
357  case tok::l_square:
358  ++UnpairedSquareBrackets;
359  nextToken();
360  break;
361  default:
362  nextToken();
363  break;
364  }
365  } while (!eof());
366 }
367 
368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369  bool SwitchLabelEncountered = false;
370  do {
371  tok::TokenKind kind = FormatTok->Tok.getKind();
372  if (FormatTok->getType() == TT_MacroBlockBegin) {
373  kind = tok::l_brace;
374  } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375  kind = tok::r_brace;
376  }
377 
378  switch (kind) {
379  case tok::comment:
380  nextToken();
381  addUnwrappedLine();
382  break;
383  case tok::l_brace:
384  // FIXME: Add parameter whether this can happen - if this happens, we must
385  // be in a non-declaration context.
386  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387  continue;
388  parseBlock(/*MustBeDeclaration=*/false);
389  addUnwrappedLine();
390  break;
391  case tok::r_brace:
392  if (HasOpeningBrace)
393  return;
394  nextToken();
395  addUnwrappedLine();
396  break;
397  case tok::kw_default: {
398  unsigned StoredPosition = Tokens->getPosition();
399  FormatToken *Next;
400  do {
401  Next = Tokens->getNextToken();
402  } while (Next && Next->is(tok::comment));
403  FormatTok = Tokens->setPosition(StoredPosition);
404  if (Next && Next->isNot(tok::colon)) {
405  // default not followed by ':' is not a case label; treat it like
406  // an identifier.
407  parseStructuralElement();
408  break;
409  }
410  // Else, if it is 'default:', fall through to the case handling.
411  LLVM_FALLTHROUGH;
412  }
413  case tok::kw_case:
414  if (Style.Language == FormatStyle::LK_JavaScript &&
415  Line->MustBeDeclaration) {
416  // A 'case: string' style field declaration.
417  parseStructuralElement();
418  break;
419  }
420  if (!SwitchLabelEncountered &&
421  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422  ++Line->Level;
423  SwitchLabelEncountered = true;
424  parseStructuralElement();
425  break;
426  case tok::l_square:
427  if (Style.isCSharp()) {
428  nextToken();
429  parseCSharpAttribute();
430  break;
431  }
432  LLVM_FALLTHROUGH;
433  default:
434  parseStructuralElement();
435  break;
436  }
437  } while (!eof());
438 }
439 
440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441  // We'll parse forward through the tokens until we hit
442  // a closing brace or eof - note that getNextToken() will
443  // parse macros, so this will magically work inside macro
444  // definitions, too.
445  unsigned StoredPosition = Tokens->getPosition();
446  FormatToken *Tok = FormatTok;
447  const FormatToken *PrevTok = Tok->Previous;
448  // Keep a stack of positions of lbrace tokens. We will
449  // update information about whether an lbrace starts a
450  // braced init list or a different block during the loop.
451  SmallVector<FormatToken *, 8> LBraceStack;
452  assert(Tok->Tok.is(tok::l_brace));
453  do {
454  // Get next non-comment token.
455  FormatToken *NextTok;
456  unsigned ReadTokens = 0;
457  do {
458  NextTok = Tokens->getNextToken();
459  ++ReadTokens;
460  } while (NextTok->is(tok::comment));
461 
462  switch (Tok->Tok.getKind()) {
463  case tok::l_brace:
464  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465  if (PrevTok->isOneOf(tok::colon, tok::less))
466  // A ':' indicates this code is in a type, or a braced list
467  // following a label in an object literal ({a: {b: 1}}).
468  // A '<' could be an object used in a comparison, but that is nonsense
469  // code (can never return true), so more likely it is a generic type
470  // argument (`X<{a: string; b: number}>`).
471  // The code below could be confused by semicolons between the
472  // individual members in a type member list, which would normally
473  // trigger BK_Block. In both cases, this must be parsed as an inline
474  // braced init.
475  Tok->setBlockKind(BK_BracedInit);
476  else if (PrevTok->is(tok::r_paren))
477  // `) { }` can only occur in function or method declarations in JS.
478  Tok->setBlockKind(BK_Block);
479  } else {
480  Tok->setBlockKind(BK_Unknown);
481  }
482  LBraceStack.push_back(Tok);
483  break;
484  case tok::r_brace:
485  if (LBraceStack.empty())
486  break;
487  if (LBraceStack.back()->is(BK_Unknown)) {
488  bool ProbablyBracedList = false;
489  if (Style.Language == FormatStyle::LK_Proto) {
490  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491  } else {
492  // Using OriginalColumn to distinguish between ObjC methods and
493  // binary operators is a bit hacky.
494  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495  NextTok->OriginalColumn == 0;
496 
497  // If there is a comma, semicolon or right paren after the closing
498  // brace, we assume this is a braced initializer list. Note that
499  // regardless how we mark inner braces here, we will overwrite the
500  // BlockKind later if we parse a braced list (where all blocks
501  // inside are by default braced lists), or when we explicitly detect
502  // blocks (for example while parsing lambdas).
503  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504  // braced list in JS.
505  ProbablyBracedList =
506  (Style.Language == FormatStyle::LK_JavaScript &&
507  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508  Keywords.kw_as)) ||
509  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511  tok::r_paren, tok::r_square, tok::l_brace,
512  tok::ellipsis) ||
513  (NextTok->is(tok::identifier) &&
514  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515  (NextTok->is(tok::semi) &&
516  (!ExpectClassBody || LBraceStack.size() != 1)) ||
517  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518  if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519  // We can have an array subscript after a braced init
520  // list, but C++11 attributes are expected after blocks.
521  NextTok = Tokens->getNextToken();
522  ++ReadTokens;
523  ProbablyBracedList = NextTok->isNot(tok::l_square);
524  }
525  }
526  if (ProbablyBracedList) {
527  Tok->setBlockKind(BK_BracedInit);
528  LBraceStack.back()->setBlockKind(BK_BracedInit);
529  } else {
530  Tok->setBlockKind(BK_Block);
531  LBraceStack.back()->setBlockKind(BK_Block);
532  }
533  }
534  LBraceStack.pop_back();
535  break;
536  case tok::identifier:
537  if (!Tok->is(TT_StatementMacro))
538  break;
539  LLVM_FALLTHROUGH;
540  case tok::at:
541  case tok::semi:
542  case tok::kw_if:
543  case tok::kw_while:
544  case tok::kw_for:
545  case tok::kw_switch:
546  case tok::kw_try:
547  case tok::kw___try:
548  if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549  LBraceStack.back()->setBlockKind(BK_Block);
550  break;
551  default:
552  break;
553  }
554  PrevTok = Tok;
555  Tok = NextTok;
556  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558  // Assume other blocks for all unclosed opening braces.
559  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560  if (LBraceStack[i]->is(BK_Unknown))
561  LBraceStack[i]->setBlockKind(BK_Block);
562  }
563 
564  FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
568 static inline void hash_combine(std::size_t &seed, const T &v) {
569  std::hash<T> hasher;
570  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
573 size_t UnwrappedLineParser::computePPHash() const {
574  size_t h = 0;
575  for (const auto &i : PPStack) {
576  hash_combine(h, size_t(i.Kind));
577  hash_combine(h, i.Line);
578  }
579  return h;
580 }
581 
582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
583  bool MunchSemi,
584  bool UnindentWhitesmithsBraces) {
585  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
586  "'{' or macro block token expected");
587  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
588  FormatTok->setBlockKind(BK_Block);
589 
590  // For Whitesmiths mode, jump to the next level prior to skipping over the
591  // braces.
592  if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
593  ++Line->Level;
594 
595  size_t PPStartHash = computePPHash();
596 
597  unsigned InitialLevel = Line->Level;
598  nextToken(/*LevelDifference=*/AddLevels);
599 
600  if (MacroBlock && FormatTok->is(tok::l_paren))
601  parseParens();
602 
603  size_t NbPreprocessorDirectives =
604  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
605  addUnwrappedLine();
606  size_t OpeningLineIndex =
607  CurrentLines->empty()
609  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
610 
611  // Whitesmiths is weird here. The brace needs to be indented for the namespace
612  // block, but the block itself may not be indented depending on the style
613  // settings. This allows the format to back up one level in those cases.
614  if (UnindentWhitesmithsBraces)
615  --Line->Level;
616 
617  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618  MustBeDeclaration);
619  if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
620  Line->Level += AddLevels;
621  parseLevel(/*HasOpeningBrace=*/true);
622 
623  if (eof())
624  return;
625 
626  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
627  : !FormatTok->is(tok::r_brace)) {
628  Line->Level = InitialLevel;
629  FormatTok->setBlockKind(BK_Block);
630  return;
631  }
632 
633  size_t PPEndHash = computePPHash();
634 
635  // Munch the closing brace.
636  nextToken(/*LevelDifference=*/-AddLevels);
637 
638  if (MacroBlock && FormatTok->is(tok::l_paren))
639  parseParens();
640 
641  if (FormatTok->is(tok::arrow)) {
642  // Following the } we can find a trailing return type arrow
643  // as part of an implicit conversion constraint.
644  nextToken();
645  parseStructuralElement();
646  }
647 
648  if (MunchSemi && FormatTok->Tok.is(tok::semi))
649  nextToken();
650 
651  Line->Level = InitialLevel;
652 
653  if (PPStartHash == PPEndHash) {
654  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
655  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
656  // Update the opening line to add the forward reference as well
657  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
658  CurrentLines->size() - 1;
659  }
660  }
661 }
662 
663 static bool isGoogScope(const UnwrappedLine &Line) {
664  // FIXME: Closure-library specific stuff should not be hard-coded but be
665  // configurable.
666  if (Line.Tokens.size() < 4)
667  return false;
668  auto I = Line.Tokens.begin();
669  if (I->Tok->TokenText != "goog")
670  return false;
671  ++I;
672  if (I->Tok->isNot(tok::period))
673  return false;
674  ++I;
675  if (I->Tok->TokenText != "scope")
676  return false;
677  ++I;
678  return I->Tok->is(tok::l_paren);
679 }
680 
681 static bool isIIFE(const UnwrappedLine &Line,
682  const AdditionalKeywords &Keywords) {
683  // Look for the start of an immediately invoked anonymous function.
684  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
685  // This is commonly done in JavaScript to create a new, anonymous scope.
686  // Example: (function() { ... })()
687  if (Line.Tokens.size() < 3)
688  return false;
689  auto I = Line.Tokens.begin();
690  if (I->Tok->isNot(tok::l_paren))
691  return false;
692  ++I;
693  if (I->Tok->isNot(Keywords.kw_function))
694  return false;
695  ++I;
696  return I->Tok->is(tok::l_paren);
697 }
698 
699 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
700  const FormatToken &InitialToken) {
701  if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
702  return Style.BraceWrapping.AfterNamespace;
703  if (InitialToken.is(tok::kw_class))
704  return Style.BraceWrapping.AfterClass;
705  if (InitialToken.is(tok::kw_union))
706  return Style.BraceWrapping.AfterUnion;
707  if (InitialToken.is(tok::kw_struct))
708  return Style.BraceWrapping.AfterStruct;
709  return false;
710 }
711 
712 void UnwrappedLineParser::parseChildBlock() {
713  FormatTok->setBlockKind(BK_Block);
714  nextToken();
715  {
716  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
717  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
718  ScopedLineState LineState(*this);
719  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
720  /*MustBeDeclaration=*/false);
721  Line->Level += SkipIndent ? 0 : 1;
722  parseLevel(/*HasOpeningBrace=*/true);
723  flushComments(isOnNewLine(*FormatTok));
724  Line->Level -= SkipIndent ? 0 : 1;
725  }
726  nextToken();
727 }
728 
729 void UnwrappedLineParser::parsePPDirective() {
730  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
731  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
732 
733  nextToken();
734 
735  if (!FormatTok->Tok.getIdentifierInfo()) {
736  parsePPUnknown();
737  return;
738  }
739 
740  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
741  case tok::pp_define:
742  parsePPDefine();
743  return;
744  case tok::pp_if:
745  parsePPIf(/*IfDef=*/false);
746  break;
747  case tok::pp_ifdef:
748  case tok::pp_ifndef:
749  parsePPIf(/*IfDef=*/true);
750  break;
751  case tok::pp_else:
752  parsePPElse();
753  break;
754  case tok::pp_elif:
755  parsePPElIf();
756  break;
757  case tok::pp_endif:
758  parsePPEndIf();
759  break;
760  default:
761  parsePPUnknown();
762  break;
763  }
764 }
765 
766 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
767  size_t Line = CurrentLines->size();
768  if (CurrentLines == &PreprocessorDirectives)
769  Line += Lines.size();
770 
771  if (Unreachable ||
772  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
773  PPStack.push_back({PP_Unreachable, Line});
774  else
775  PPStack.push_back({PP_Conditional, Line});
776 }
777 
778 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
779  ++PPBranchLevel;
780  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
781  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
782  PPLevelBranchIndex.push_back(0);
783  PPLevelBranchCount.push_back(0);
784  }
785  PPChainBranchIndex.push(0);
786  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
787  conditionalCompilationCondition(Unreachable || Skip);
788 }
789 
790 void UnwrappedLineParser::conditionalCompilationAlternative() {
791  if (!PPStack.empty())
792  PPStack.pop_back();
793  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
794  if (!PPChainBranchIndex.empty())
795  ++PPChainBranchIndex.top();
796  conditionalCompilationCondition(
797  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
798  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
799 }
800 
801 void UnwrappedLineParser::conditionalCompilationEnd() {
802  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
803  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
804  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
805  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
806  }
807  }
808  // Guard against #endif's without #if.
809  if (PPBranchLevel > -1)
810  --PPBranchLevel;
811  if (!PPChainBranchIndex.empty())
812  PPChainBranchIndex.pop();
813  if (!PPStack.empty())
814  PPStack.pop_back();
815 }
816 
817 void UnwrappedLineParser::parsePPIf(bool IfDef) {
818  bool IfNDef = FormatTok->is(tok::pp_ifndef);
819  nextToken();
820  bool Unreachable = false;
821  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
822  Unreachable = true;
823  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
824  Unreachable = true;
825  conditionalCompilationStart(Unreachable);
826  FormatToken *IfCondition = FormatTok;
827  // If there's a #ifndef on the first line, and the only lines before it are
828  // comments, it could be an include guard.
829  bool MaybeIncludeGuard = IfNDef;
830  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
831  for (auto &Line : Lines) {
832  if (!Line.Tokens.front().Tok->is(tok::comment)) {
833  MaybeIncludeGuard = false;
834  IncludeGuard = IG_Rejected;
835  break;
836  }
837  }
838  --PPBranchLevel;
839  parsePPUnknown();
840  ++PPBranchLevel;
841  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
842  IncludeGuard = IG_IfNdefed;
843  IncludeGuardToken = IfCondition;
844  }
845 }
846 
847 void UnwrappedLineParser::parsePPElse() {
848  // If a potential include guard has an #else, it's not an include guard.
849  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
850  IncludeGuard = IG_Rejected;
851  conditionalCompilationAlternative();
852  if (PPBranchLevel > -1)
853  --PPBranchLevel;
854  parsePPUnknown();
855  ++PPBranchLevel;
856 }
857 
858 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
859 
860 void UnwrappedLineParser::parsePPEndIf() {
861  conditionalCompilationEnd();
862  parsePPUnknown();
863  // If the #endif of a potential include guard is the last thing in the file,
864  // then we found an include guard.
865  unsigned TokenPosition = Tokens->getPosition();
866  FormatToken *PeekNext = AllTokens[TokenPosition];
867  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
868  PeekNext->is(tok::eof) &&
869  Style.IndentPPDirectives != FormatStyle::PPDIS_None)
870  IncludeGuard = IG_Found;
871 }
872 
873 void UnwrappedLineParser::parsePPDefine() {
874  nextToken();
875 
876  if (!FormatTok->Tok.getIdentifierInfo()) {
877  IncludeGuard = IG_Rejected;
878  IncludeGuardToken = nullptr;
879  parsePPUnknown();
880  return;
881  }
882 
883  if (IncludeGuard == IG_IfNdefed &&
884  IncludeGuardToken->TokenText == FormatTok->TokenText) {
885  IncludeGuard = IG_Defined;
886  IncludeGuardToken = nullptr;
887  for (auto &Line : Lines) {
888  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
889  IncludeGuard = IG_Rejected;
890  break;
891  }
892  }
893  }
894 
895  nextToken();
896  if (FormatTok->Tok.getKind() == tok::l_paren &&
897  FormatTok->WhitespaceRange.getBegin() ==
898  FormatTok->WhitespaceRange.getEnd()) {
899  parseParens();
900  }
901  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
902  Line->Level += PPBranchLevel + 1;
903  addUnwrappedLine();
904  ++Line->Level;
905 
906  // Errors during a preprocessor directive can only affect the layout of the
907  // preprocessor directive, and thus we ignore them. An alternative approach
908  // would be to use the same approach we use on the file level (no
909  // re-indentation if there was a structural error) within the macro
910  // definition.
911  parseFile();
912 }
913 
914 void UnwrappedLineParser::parsePPUnknown() {
915  do {
916  nextToken();
917  } while (!eof());
918  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
919  Line->Level += PPBranchLevel + 1;
920  addUnwrappedLine();
921 }
922 
923 // Here we exclude certain tokens that are not usually the first token in an
924 // unwrapped line. This is used in attempt to distinguish macro calls without
925 // trailing semicolons from other constructs split to several lines.
926 static bool tokenCanStartNewLine(const FormatToken &Tok) {
927  // Semicolon can be a null-statement, l_square can be a start of a macro or
928  // a C++11 attribute, but this doesn't seem to be common.
929  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
930  Tok.isNot(TT_AttributeSquare) &&
931  // Tokens that can only be used as binary operators and a part of
932  // overloaded operator names.
933  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
934  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
935  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
936  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
937  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
938  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
939  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
940  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
941  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
942  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
943  Tok.isNot(tok::lesslessequal) &&
944  // Colon is used in labels, base class lists, initializer lists,
945  // range-based for loops, ternary operator, but should never be the
946  // first token in an unwrapped line.
947  Tok.isNot(tok::colon) &&
948  // 'noexcept' is a trailing annotation.
949  Tok.isNot(tok::kw_noexcept);
950 }
951 
952 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
953  const FormatToken *FormatTok) {
954  // FIXME: This returns true for C/C++ keywords like 'struct'.
955  return FormatTok->is(tok::identifier) &&
956  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
957  !FormatTok->isOneOf(
958  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
959  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
960  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
961  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
962  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
963  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
964  Keywords.kw_from));
965 }
966 
967 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
968  const FormatToken *FormatTok) {
969  return FormatTok->Tok.isLiteral() ||
970  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
971  mustBeJSIdent(Keywords, FormatTok);
972 }
973 
974 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
975 // when encountered after a value (see mustBeJSIdentOrValue).
976 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
977  const FormatToken *FormatTok) {
978  return FormatTok->isOneOf(
979  tok::kw_return, Keywords.kw_yield,
980  // conditionals
981  tok::kw_if, tok::kw_else,
982  // loops
983  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
984  // switch/case
985  tok::kw_switch, tok::kw_case,
986  // exceptions
987  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
988  // declaration
989  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
990  Keywords.kw_async, Keywords.kw_function,
991  // import/export
992  Keywords.kw_import, tok::kw_export);
993 }
994 
995 // readTokenWithJavaScriptASI reads the next token and terminates the current
996 // line if JavaScript Automatic Semicolon Insertion must
997 // happen between the current token and the next token.
998 //
999 // This method is conservative - it cannot cover all edge cases of JavaScript,
1000 // but only aims to correctly handle certain well known cases. It *must not*
1001 // return true in speculative cases.
1002 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1003  FormatToken *Previous = FormatTok;
1004  readToken();
1005  FormatToken *Next = FormatTok;
1006 
1007  bool IsOnSameLine =
1008  CommentsBeforeNextToken.empty()
1009  ? Next->NewlinesBefore == 0
1010  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1011  if (IsOnSameLine)
1012  return;
1013 
1014  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1015  bool PreviousStartsTemplateExpr =
1016  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1017  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1018  // If the line contains an '@' sign, the previous token might be an
1019  // annotation, which can precede another identifier/value.
1020  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1021  [](UnwrappedLineNode &LineNode) {
1022  return LineNode.Tok->is(tok::at);
1023  }) != Line->Tokens.end();
1024  if (HasAt)
1025  return;
1026  }
1027  if (Next->is(tok::exclaim) && PreviousMustBeValue)
1028  return addUnwrappedLine();
1029  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1030  bool NextEndsTemplateExpr =
1031  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1032  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1033  (PreviousMustBeValue ||
1034  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1035  tok::minusminus)))
1036  return addUnwrappedLine();
1037  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1038  isJSDeclOrStmt(Keywords, Next))
1039  return addUnwrappedLine();
1040 }
1041 
1042 void UnwrappedLineParser::parseStructuralElement() {
1043  assert(!FormatTok->is(tok::l_brace));
1044  if (Style.Language == FormatStyle::LK_TableGen &&
1045  FormatTok->is(tok::pp_include)) {
1046  nextToken();
1047  if (FormatTok->is(tok::string_literal))
1048  nextToken();
1049  addUnwrappedLine();
1050  return;
1051  }
1052  switch (FormatTok->Tok.getKind()) {
1053  case tok::kw_asm:
1054  nextToken();
1055  if (FormatTok->is(tok::l_brace)) {
1056  FormatTok->setType(TT_InlineASMBrace);
1057  nextToken();
1058  while (FormatTok && FormatTok->isNot(tok::eof)) {
1059  if (FormatTok->is(tok::r_brace)) {
1060  FormatTok->setType(TT_InlineASMBrace);
1061  nextToken();
1062  addUnwrappedLine();
1063  break;
1064  }
1065  FormatTok->Finalized = true;
1066  nextToken();
1067  }
1068  }
1069  break;
1070  case tok::kw_namespace:
1071  parseNamespace();
1072  return;
1073  case tok::kw_public:
1074  case tok::kw_protected:
1075  case tok::kw_private:
1076  if (Style.Language == FormatStyle::LK_Java ||
1077  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1078  nextToken();
1079  else
1080  parseAccessSpecifier();
1081  return;
1082  case tok::kw_if:
1083  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1084  // field/method declaration.
1085  break;
1086  parseIfThenElse();
1087  return;
1088  case tok::kw_for:
1089  case tok::kw_while:
1090  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1091  // field/method declaration.
1092  break;
1093  parseForOrWhileLoop();
1094  return;
1095  case tok::kw_do:
1096  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1097  // field/method declaration.
1098  break;
1099  parseDoWhile();
1100  return;
1101  case tok::kw_switch:
1102  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1103  // 'switch: string' field declaration.
1104  break;
1105  parseSwitch();
1106  return;
1107  case tok::kw_default:
1108  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1109  // 'default: string' field declaration.
1110  break;
1111  nextToken();
1112  if (FormatTok->is(tok::colon)) {
1113  parseLabel();
1114  return;
1115  }
1116  // e.g. "default void f() {}" in a Java interface.
1117  break;
1118  case tok::kw_case:
1119  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1120  // 'case: string' field declaration.
1121  break;
1122  parseCaseLabel();
1123  return;
1124  case tok::kw_try:
1125  case tok::kw___try:
1126  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1127  // field/method declaration.
1128  break;
1129  parseTryCatch();
1130  return;
1131  case tok::kw_extern:
1132  nextToken();
1133  if (FormatTok->Tok.is(tok::string_literal)) {
1134  nextToken();
1135  if (FormatTok->Tok.is(tok::l_brace)) {
1136  if (!Style.IndentExternBlock) {
1137  if (Style.BraceWrapping.AfterExternBlock) {
1138  addUnwrappedLine();
1139  }
1140  unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1141  parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1142  } else {
1143  unsigned AddLevels =
1144  Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1145  parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1146  }
1147  addUnwrappedLine();
1148  return;
1149  }
1150  }
1151  break;
1152  case tok::kw_export:
1153  if (Style.Language == FormatStyle::LK_JavaScript) {
1154  parseJavaScriptEs6ImportExport();
1155  return;
1156  }
1157  if (!Style.isCpp())
1158  break;
1159  // Handle C++ "(inline|export) namespace".
1160  LLVM_FALLTHROUGH;
1161  case tok::kw_inline:
1162  nextToken();
1163  if (FormatTok->Tok.is(tok::kw_namespace)) {
1164  parseNamespace();
1165  return;
1166  }
1167  break;
1168  case tok::identifier:
1169  if (FormatTok->is(TT_ForEachMacro)) {
1170  parseForOrWhileLoop();
1171  return;
1172  }
1173  if (FormatTok->is(TT_MacroBlockBegin)) {
1174  parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1175  /*MunchSemi=*/false);
1176  return;
1177  }
1178  if (FormatTok->is(Keywords.kw_import)) {
1179  if (Style.Language == FormatStyle::LK_JavaScript) {
1180  parseJavaScriptEs6ImportExport();
1181  return;
1182  }
1183  if (Style.Language == FormatStyle::LK_Proto) {
1184  nextToken();
1185  if (FormatTok->is(tok::kw_public))
1186  nextToken();
1187  if (!FormatTok->is(tok::string_literal))
1188  return;
1189  nextToken();
1190  if (FormatTok->is(tok::semi))
1191  nextToken();
1192  addUnwrappedLine();
1193  return;
1194  }
1195  }
1196  if (Style.isCpp() &&
1197  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1198  Keywords.kw_slots, Keywords.kw_qslots)) {
1199  nextToken();
1200  if (FormatTok->is(tok::colon)) {
1201  nextToken();
1202  addUnwrappedLine();
1203  return;
1204  }
1205  }
1206  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1207  parseStatementMacro();
1208  return;
1209  }
1210  if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1211  parseNamespace();
1212  return;
1213  }
1214  // In all other cases, parse the declaration.
1215  break;
1216  default:
1217  break;
1218  }
1219  do {
1220  const FormatToken *Previous = FormatTok->Previous;
1221  switch (FormatTok->Tok.getKind()) {
1222  case tok::at:
1223  nextToken();
1224  if (FormatTok->Tok.is(tok::l_brace)) {
1225  nextToken();
1226  parseBracedList();
1227  break;
1228  } else if (Style.Language == FormatStyle::LK_Java &&
1229  FormatTok->is(Keywords.kw_interface)) {
1230  nextToken();
1231  break;
1232  }
1233  switch (FormatTok->Tok.getObjCKeywordID()) {
1234  case tok::objc_public:
1235  case tok::objc_protected:
1236  case tok::objc_package:
1237  case tok::objc_private:
1238  return parseAccessSpecifier();
1239  case tok::objc_interface:
1240  case tok::objc_implementation:
1241  return parseObjCInterfaceOrImplementation();
1242  case tok::objc_protocol:
1243  if (parseObjCProtocol())
1244  return;
1245  break;
1246  case tok::objc_end:
1247  return; // Handled by the caller.
1248  case tok::objc_optional:
1249  case tok::objc_required:
1250  nextToken();
1251  addUnwrappedLine();
1252  return;
1253  case tok::objc_autoreleasepool:
1254  nextToken();
1255  if (FormatTok->Tok.is(tok::l_brace)) {
1256  if (Style.BraceWrapping.AfterControlStatement ==
1257  FormatStyle::BWACS_Always)
1258  addUnwrappedLine();
1259  parseBlock(/*MustBeDeclaration=*/false);
1260  }
1261  addUnwrappedLine();
1262  return;
1263  case tok::objc_synchronized:
1264  nextToken();
1265  if (FormatTok->Tok.is(tok::l_paren))
1266  // Skip synchronization object
1267  parseParens();
1268  if (FormatTok->Tok.is(tok::l_brace)) {
1269  if (Style.BraceWrapping.AfterControlStatement ==
1270  FormatStyle::BWACS_Always)
1271  addUnwrappedLine();
1272  parseBlock(/*MustBeDeclaration=*/false);
1273  }
1274  addUnwrappedLine();
1275  return;
1276  case tok::objc_try:
1277  // This branch isn't strictly necessary (the kw_try case below would
1278  // do this too after the tok::at is parsed above). But be explicit.
1279  parseTryCatch();
1280  return;
1281  default:
1282  break;
1283  }
1284  break;
1285  case tok::kw_concept:
1286  parseConcept();
1287  break;
1288  case tok::kw_requires:
1289  parseRequires();
1290  break;
1291  case tok::kw_enum:
1292  // Ignore if this is part of "template <enum ...".
1293  if (Previous && Previous->is(tok::less)) {
1294  nextToken();
1295  break;
1296  }
1297 
1298  // parseEnum falls through and does not yet add an unwrapped line as an
1299  // enum definition can start a structural element.
1300  if (!parseEnum())
1301  break;
1302  // This only applies for C++.
1303  if (!Style.isCpp()) {
1304  addUnwrappedLine();
1305  return;
1306  }
1307  break;
1308  case tok::kw_typedef:
1309  nextToken();
1310  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1311  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1312  Keywords.kw_CF_CLOSED_ENUM,
1313  Keywords.kw_NS_CLOSED_ENUM))
1314  parseEnum();
1315  break;
1316  case tok::kw_struct:
1317  case tok::kw_union:
1318  case tok::kw_class:
1319  if (parseStructLike()) {
1320  return;
1321  }
1322  break;
1323  case tok::period:
1324  nextToken();
1325  // In Java, classes have an implicit static member "class".
1326  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1327  FormatTok->is(tok::kw_class))
1328  nextToken();
1329  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1330  FormatTok->Tok.getIdentifierInfo())
1331  // JavaScript only has pseudo keywords, all keywords are allowed to
1332  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1333  nextToken();
1334  break;
1335  case tok::semi:
1336  nextToken();
1337  addUnwrappedLine();
1338  return;
1339  case tok::r_brace:
1340  addUnwrappedLine();
1341  return;
1342  case tok::l_paren:
1343  parseParens();
1344  break;
1345  case tok::kw_operator:
1346  nextToken();
1347  if (FormatTok->isBinaryOperator())
1348  nextToken();
1349  break;
1350  case tok::caret:
1351  nextToken();
1352  if (FormatTok->Tok.isAnyIdentifier() ||
1353  FormatTok->isSimpleTypeSpecifier())
1354  nextToken();
1355  if (FormatTok->is(tok::l_paren))
1356  parseParens();
1357  if (FormatTok->is(tok::l_brace))
1358  parseChildBlock();
1359  break;
1360  case tok::l_brace:
1361  if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1362  // A block outside of parentheses must be the last part of a
1363  // structural element.
1364  // FIXME: Figure out cases where this is not true, and add projections
1365  // for them (the one we know is missing are lambdas).
1366  if (Style.BraceWrapping.AfterFunction)
1367  addUnwrappedLine();
1368  FormatTok->setType(TT_FunctionLBrace);
1369  parseBlock(/*MustBeDeclaration=*/false);
1370  addUnwrappedLine();
1371  return;
1372  }
1373  // Otherwise this was a braced init list, and the structural
1374  // element continues.
1375  break;
1376  case tok::kw_try:
1377  if (Style.Language == FormatStyle::LK_JavaScript &&
1378  Line->MustBeDeclaration) {
1379  // field/method declaration.
1380  nextToken();
1381  break;
1382  }
1383  // We arrive here when parsing function-try blocks.
1384  if (Style.BraceWrapping.AfterFunction)
1385  addUnwrappedLine();
1386  parseTryCatch();
1387  return;
1388  case tok::identifier: {
1389  if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1390  Line->MustBeDeclaration) {
1391  addUnwrappedLine();
1392  parseCSharpGenericTypeConstraint();
1393  break;
1394  }
1395  if (FormatTok->is(TT_MacroBlockEnd)) {
1396  addUnwrappedLine();
1397  return;
1398  }
1399 
1400  // Function declarations (as opposed to function expressions) are parsed
1401  // on their own unwrapped line by continuing this loop. Function
1402  // expressions (functions that are not on their own line) must not create
1403  // a new unwrapped line, so they are special cased below.
1404  size_t TokenCount = Line->Tokens.size();
1405  if (Style.Language == FormatStyle::LK_JavaScript &&
1406  FormatTok->is(Keywords.kw_function) &&
1407  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1408  Keywords.kw_async)))) {
1409  tryToParseJSFunction();
1410  break;
1411  }
1412  if ((Style.Language == FormatStyle::LK_JavaScript ||
1413  Style.Language == FormatStyle::LK_Java) &&
1414  FormatTok->is(Keywords.kw_interface)) {
1415  if (Style.Language == FormatStyle::LK_JavaScript) {
1416  // In JavaScript/TypeScript, "interface" can be used as a standalone
1417  // identifier, e.g. in `var interface = 1;`. If "interface" is
1418  // followed by another identifier, it is very like to be an actual
1419  // interface declaration.
1420  unsigned StoredPosition = Tokens->getPosition();
1421  FormatToken *Next = Tokens->getNextToken();
1422  FormatTok = Tokens->setPosition(StoredPosition);
1423  if (Next && !mustBeJSIdent(Keywords, Next)) {
1424  nextToken();
1425  break;
1426  }
1427  }
1428  parseRecord();
1429  addUnwrappedLine();
1430  return;
1431  }
1432 
1433  if (FormatTok->is(Keywords.kw_interface)) {
1434  if (parseStructLike()) {
1435  return;
1436  }
1437  break;
1438  }
1439 
1440  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1441  parseStatementMacro();
1442  return;
1443  }
1444 
1445  // See if the following token should start a new unwrapped line.
1446  StringRef Text = FormatTok->TokenText;
1447  nextToken();
1448 
1449  // JS doesn't have macros, and within classes colons indicate fields, not
1450  // labels.
1451  if (Style.Language == FormatStyle::LK_JavaScript)
1452  break;
1453 
1454  TokenCount = Line->Tokens.size();
1455  if (TokenCount == 1 ||
1456  (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1457  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1458  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1459  parseLabel(!Style.IndentGotoLabels);
1460  return;
1461  }
1462  // Recognize function-like macro usages without trailing semicolon as
1463  // well as free-standing macros like Q_OBJECT.
1464  bool FunctionLike = FormatTok->is(tok::l_paren);
1465  if (FunctionLike)
1466  parseParens();
1467 
1468  bool FollowedByNewline =
1469  CommentsBeforeNextToken.empty()
1470  ? FormatTok->NewlinesBefore > 0
1471  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1472 
1473  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1474  tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1475  addUnwrappedLine();
1476  return;
1477  }
1478  }
1479  break;
1480  }
1481  case tok::equal:
1482  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1483  // TT_FatArrow. The always start an expression or a child block if
1484  // followed by a curly.
1485  if (FormatTok->is(TT_FatArrow)) {
1486  nextToken();
1487  if (FormatTok->is(tok::l_brace)) {
1488  // C# may break after => if the next character is a newline.
1489  if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1490  // calling `addUnwrappedLine()` here causes odd parsing errors.
1491  FormatTok->MustBreakBefore = true;
1492  }
1493  parseChildBlock();
1494  }
1495  break;
1496  }
1497 
1498  nextToken();
1499  if (FormatTok->Tok.is(tok::l_brace)) {
1500  // Block kind should probably be set to BK_BracedInit for any language.
1501  // C# needs this change to ensure that array initialisers and object
1502  // initialisers are indented the same way.
1503  if (Style.isCSharp())
1504  FormatTok->setBlockKind(BK_BracedInit);
1505  nextToken();
1506  parseBracedList();
1507  } else if (Style.Language == FormatStyle::LK_Proto &&
1508  FormatTok->Tok.is(tok::less)) {
1509  nextToken();
1510  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1511  /*ClosingBraceKind=*/tok::greater);
1512  }
1513  break;
1514  case tok::l_square:
1515  parseSquare();
1516  break;
1517  case tok::kw_new:
1518  parseNew();
1519  break;
1520  default:
1521  nextToken();
1522  break;
1523  }
1524  } while (!eof());
1525 }
1526 
1527 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1528  assert(FormatTok->is(tok::l_brace));
1529  if (!Style.isCSharp())
1530  return false;
1531  // See if it's a property accessor.
1532  if (FormatTok->Previous->isNot(tok::identifier))
1533  return false;
1534 
1535  // See if we are inside a property accessor.
1536  //
1537  // Record the current tokenPosition so that we can advance and
1538  // reset the current token. `Next` is not set yet so we need
1539  // another way to advance along the token stream.
1540  unsigned int StoredPosition = Tokens->getPosition();
1541  FormatToken *Tok = Tokens->getNextToken();
1542 
1543  // A trivial property accessor is of the form:
1544  // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1545  // Track these as they do not require line breaks to be introduced.
1546  bool HasGetOrSet = false;
1547  bool IsTrivialPropertyAccessor = true;
1548  while (!eof()) {
1549  if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1550  tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1551  Keywords.kw_set)) {
1552  if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1553  HasGetOrSet = true;
1554  Tok = Tokens->getNextToken();
1555  continue;
1556  }
1557  if (Tok->isNot(tok::r_brace))
1558  IsTrivialPropertyAccessor = false;
1559  break;
1560  }
1561 
1562  if (!HasGetOrSet) {
1563  Tokens->setPosition(StoredPosition);
1564  return false;
1565  }
1566 
1567  // Try to parse the property accessor:
1568  // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1569  Tokens->setPosition(StoredPosition);
1570  if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1571  addUnwrappedLine();
1572  nextToken();
1573  do {
1574  switch (FormatTok->Tok.getKind()) {
1575  case tok::r_brace:
1576  nextToken();
1577  if (FormatTok->is(tok::equal)) {
1578  while (!eof() && FormatTok->isNot(tok::semi))
1579  nextToken();
1580  nextToken();
1581  }
1582  addUnwrappedLine();
1583  return true;
1584  case tok::l_brace:
1585  ++Line->Level;
1586  parseBlock(/*MustBeDeclaration=*/true);
1587  addUnwrappedLine();
1588  --Line->Level;
1589  break;
1590  case tok::equal:
1591  if (FormatTok->is(TT_FatArrow)) {
1592  ++Line->Level;
1593  do {
1594  nextToken();
1595  } while (!eof() && FormatTok->isNot(tok::semi));
1596  nextToken();
1597  addUnwrappedLine();
1598  --Line->Level;
1599  break;
1600  }
1601  nextToken();
1602  break;
1603  default:
1604  if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1605  !IsTrivialPropertyAccessor) {
1606  // Non-trivial get/set needs to be on its own line.
1607  addUnwrappedLine();
1608  }
1609  nextToken();
1610  }
1611  } while (!eof());
1612 
1613  // Unreachable for well-formed code (paired '{' and '}').
1614  return true;
1615 }
1616 
1617 bool UnwrappedLineParser::tryToParseLambda() {
1618  if (!Style.isCpp()) {
1619  nextToken();
1620  return false;
1621  }
1622  assert(FormatTok->is(tok::l_square));
1623  FormatToken &LSquare = *FormatTok;
1624  if (!tryToParseLambdaIntroducer())
1625  return false;
1626 
1627  bool SeenArrow = false;
1628 
1629  while (FormatTok->isNot(tok::l_brace)) {
1630  if (FormatTok->isSimpleTypeSpecifier()) {
1631  nextToken();
1632  continue;
1633  }
1634  switch (FormatTok->Tok.getKind()) {
1635  case tok::l_brace:
1636  break;
1637  case tok::l_paren:
1638  parseParens();
1639  break;
1640  case tok::amp:
1641  case tok::star:
1642  case tok::kw_const:
1643  case tok::comma:
1644  case tok::less:
1645  case tok::greater:
1646  case tok::identifier:
1647  case tok::numeric_constant:
1648  case tok::coloncolon:
1649  case tok::kw_class:
1650  case tok::kw_mutable:
1651  case tok::kw_noexcept:
1652  case tok::kw_template:
1653  case tok::kw_typename:
1654  nextToken();
1655  break;
1656  // Specialization of a template with an integer parameter can contain
1657  // arithmetic, logical, comparison and ternary operators.
1658  //
1659  // FIXME: This also accepts sequences of operators that are not in the scope
1660  // of a template argument list.
1661  //
1662  // In a C++ lambda a template type can only occur after an arrow. We use
1663  // this as an heuristic to distinguish between Objective-C expressions
1664  // followed by an `a->b` expression, such as:
1665  // ([obj func:arg] + a->b)
1666  // Otherwise the code below would parse as a lambda.
1667  //
1668  // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1669  // explicit template lists: []<bool b = true && false>(U &&u){}
1670  case tok::plus:
1671  case tok::minus:
1672  case tok::exclaim:
1673  case tok::tilde:
1674  case tok::slash:
1675  case tok::percent:
1676  case tok::lessless:
1677  case tok::pipe:
1678  case tok::pipepipe:
1679  case tok::ampamp:
1680  case tok::caret:
1681  case tok::equalequal:
1682  case tok::exclaimequal:
1683  case tok::greaterequal:
1684  case tok::lessequal:
1685  case tok::question:
1686  case tok::colon:
1687  case tok::ellipsis:
1688  case tok::kw_true:
1689  case tok::kw_false:
1690  if (SeenArrow) {
1691  nextToken();
1692  break;
1693  }
1694  return true;
1695  case tok::arrow:
1696  // This might or might not actually be a lambda arrow (this could be an
1697  // ObjC method invocation followed by a dereferencing arrow). We might
1698  // reset this back to TT_Unknown in TokenAnnotator.
1699  FormatTok->setType(TT_LambdaArrow);
1700  SeenArrow = true;
1701  nextToken();
1702  break;
1703  default:
1704  return true;
1705  }
1706  }
1707  FormatTok->setType(TT_LambdaLBrace);
1708  LSquare.setType(TT_LambdaLSquare);
1709  parseChildBlock();
1710  return true;
1711 }
1712 
1713 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1714  const FormatToken *Previous = FormatTok->Previous;
1715  if (Previous &&
1716  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1717  tok::kw_delete, tok::l_square) ||
1718  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1719  Previous->isSimpleTypeSpecifier())) {
1720  nextToken();
1721  return false;
1722  }
1723  nextToken();
1724  if (FormatTok->is(tok::l_square)) {
1725  return false;
1726  }
1727  parseSquare(/*LambdaIntroducer=*/true);
1728  return true;
1729 }
1730 
1731 void UnwrappedLineParser::tryToParseJSFunction() {
1732  assert(FormatTok->is(Keywords.kw_function) ||
1733  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1734  if (FormatTok->is(Keywords.kw_async))
1735  nextToken();
1736  // Consume "function".
1737  nextToken();
1738 
1739  // Consume * (generator function). Treat it like C++'s overloaded operators.
1740  if (FormatTok->is(tok::star)) {
1741  FormatTok->setType(TT_OverloadedOperator);
1742  nextToken();
1743  }
1744 
1745  // Consume function name.
1746  if (FormatTok->is(tok::identifier))
1747  nextToken();
1748 
1749  if (FormatTok->isNot(tok::l_paren))
1750  return;
1751 
1752  // Parse formal parameter list.
1753  parseParens();
1754 
1755  if (FormatTok->is(tok::colon)) {
1756  // Parse a type definition.
1757  nextToken();
1758 
1759  // Eat the type declaration. For braced inline object types, balance braces,
1760  // otherwise just parse until finding an l_brace for the function body.
1761  if (FormatTok->is(tok::l_brace))
1762  tryToParseBracedList();
1763  else
1764  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1765  nextToken();
1766  }
1767 
1768  if (FormatTok->is(tok::semi))
1769  return;
1770 
1771  parseChildBlock();
1772 }
1773 
1774 bool UnwrappedLineParser::tryToParseBracedList() {
1775  if (FormatTok->is(BK_Unknown))
1776  calculateBraceTypes();
1777  assert(FormatTok->isNot(BK_Unknown));
1778  if (FormatTok->is(BK_Block))
1779  return false;
1780  nextToken();
1781  parseBracedList();
1782  return true;
1783 }
1784 
1785 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1786  bool IsEnum,
1787  tok::TokenKind ClosingBraceKind) {
1788  bool HasError = false;
1789 
1790  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1791  // replace this by using parseAssigmentExpression() inside.
1792  do {
1793  if (Style.isCSharp()) {
1794  if (FormatTok->is(TT_FatArrow)) {
1795  nextToken();
1796  // Fat arrows can be followed by simple expressions or by child blocks
1797  // in curly braces.
1798  if (FormatTok->is(tok::l_brace)) {
1799  parseChildBlock();
1800  continue;
1801  }
1802  }
1803  }
1804  if (Style.Language == FormatStyle::LK_JavaScript) {
1805  if (FormatTok->is(Keywords.kw_function) ||
1806  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1807  tryToParseJSFunction();
1808  continue;
1809  }
1810  if (FormatTok->is(TT_FatArrow)) {
1811  nextToken();
1812  // Fat arrows can be followed by simple expressions or by child blocks
1813  // in curly braces.
1814  if (FormatTok->is(tok::l_brace)) {
1815  parseChildBlock();
1816  continue;
1817  }
1818  }
1819  if (FormatTok->is(tok::l_brace)) {
1820  // Could be a method inside of a braced list `{a() { return 1; }}`.
1821  if (tryToParseBracedList())
1822  continue;
1823  parseChildBlock();
1824  }
1825  }
1826  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1827  if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1828  addUnwrappedLine();
1829  nextToken();
1830  return !HasError;
1831  }
1832  switch (FormatTok->Tok.getKind()) {
1833  case tok::caret:
1834  nextToken();
1835  if (FormatTok->is(tok::l_brace)) {
1836  parseChildBlock();
1837  }
1838  break;
1839  case tok::l_square:
1840  if (Style.isCSharp())
1841  parseSquare();
1842  else
1843  tryToParseLambda();
1844  break;
1845  case tok::l_paren:
1846  parseParens();
1847  // JavaScript can just have free standing methods and getters/setters in
1848  // object literals. Detect them by a "{" following ")".
1849  if (Style.Language == FormatStyle::LK_JavaScript) {
1850  if (FormatTok->is(tok::l_brace))
1851  parseChildBlock();
1852  break;
1853  }
1854  break;
1855  case tok::l_brace:
1856  // Assume there are no blocks inside a braced init list apart
1857  // from the ones we explicitly parse out (like lambdas).
1858  FormatTok->setBlockKind(BK_BracedInit);
1859  nextToken();
1860  parseBracedList();
1861  break;
1862  case tok::less:
1863  if (Style.Language == FormatStyle::LK_Proto) {
1864  nextToken();
1865  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1866  /*ClosingBraceKind=*/tok::greater);
1867  } else {
1868  nextToken();
1869  }
1870  break;
1871  case tok::semi:
1872  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1873  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1874  // used for error recovery if we have otherwise determined that this is
1875  // a braced list.
1876  if (Style.Language == FormatStyle::LK_JavaScript) {
1877  nextToken();
1878  break;
1879  }
1880  HasError = true;
1881  if (!ContinueOnSemicolons)
1882  return !HasError;
1883  nextToken();
1884  break;
1885  case tok::comma:
1886  nextToken();
1887  if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1888  addUnwrappedLine();
1889  break;
1890  default:
1891  nextToken();
1892  break;
1893  }
1894  } while (!eof());
1895  return false;
1896 }
1897 
1898 void UnwrappedLineParser::parseParens() {
1899  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1900  nextToken();
1901  do {
1902  switch (FormatTok->Tok.getKind()) {
1903  case tok::l_paren:
1904  parseParens();
1905  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1906  parseChildBlock();
1907  break;
1908  case tok::r_paren:
1909  nextToken();
1910  return;
1911  case tok::r_brace:
1912  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1913  return;
1914  case tok::l_square:
1915  tryToParseLambda();
1916  break;
1917  case tok::l_brace:
1918  if (!tryToParseBracedList())
1919  parseChildBlock();
1920  break;
1921  case tok::at:
1922  nextToken();
1923  if (FormatTok->Tok.is(tok::l_brace)) {
1924  nextToken();
1925  parseBracedList();
1926  }
1927  break;
1928  case tok::kw_class:
1929  if (Style.Language == FormatStyle::LK_JavaScript)
1930  parseRecord(/*ParseAsExpr=*/true);
1931  else
1932  nextToken();
1933  break;
1934  case tok::identifier:
1935  if (Style.Language == FormatStyle::LK_JavaScript &&
1936  (FormatTok->is(Keywords.kw_function) ||
1937  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1938  tryToParseJSFunction();
1939  else
1940  nextToken();
1941  break;
1942  default:
1943  nextToken();
1944  break;
1945  }
1946  } while (!eof());
1947 }
1948 
1949 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1950  if (!LambdaIntroducer) {
1951  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1952  if (tryToParseLambda())
1953  return;
1954  }
1955  do {
1956  switch (FormatTok->Tok.getKind()) {
1957  case tok::l_paren:
1958  parseParens();
1959  break;
1960  case tok::r_square:
1961  nextToken();
1962  return;
1963  case tok::r_brace:
1964  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1965  return;
1966  case tok::l_square:
1967  parseSquare();
1968  break;
1969  case tok::l_brace: {
1970  if (!tryToParseBracedList())
1971  parseChildBlock();
1972  break;
1973  }
1974  case tok::at:
1975  nextToken();
1976  if (FormatTok->Tok.is(tok::l_brace)) {
1977  nextToken();
1978  parseBracedList();
1979  }
1980  break;
1981  default:
1982  nextToken();
1983  break;
1984  }
1985  } while (!eof());
1986 }
1987 
1988 void UnwrappedLineParser::parseIfThenElse() {
1989  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1990  nextToken();
1991  if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1992  nextToken();
1993  if (FormatTok->Tok.is(tok::l_paren))
1994  parseParens();
1995  // handle [[likely]] / [[unlikely]]
1996  if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1997  parseSquare();
1998  bool NeedsUnwrappedLine = false;
1999  if (FormatTok->Tok.is(tok::l_brace)) {
2000  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2001  parseBlock(/*MustBeDeclaration=*/false);
2002  if (Style.BraceWrapping.BeforeElse)
2003  addUnwrappedLine();
2004  else
2005  NeedsUnwrappedLine = true;
2006  } else {
2007  addUnwrappedLine();
2008  ++Line->Level;
2009  parseStructuralElement();
2010  --Line->Level;
2011  }
2012  if (FormatTok->Tok.is(tok::kw_else)) {
2013  nextToken();
2014  // handle [[likely]] / [[unlikely]]
2015  if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2016  parseSquare();
2017  if (FormatTok->Tok.is(tok::l_brace)) {
2018  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2019  parseBlock(/*MustBeDeclaration=*/false);
2020  addUnwrappedLine();
2021  } else if (FormatTok->Tok.is(tok::kw_if)) {
2022  parseIfThenElse();
2023  } else {
2024  addUnwrappedLine();
2025  ++Line->Level;
2026  parseStructuralElement();
2027  if (FormatTok->is(tok::eof))
2028  addUnwrappedLine();
2029  --Line->Level;
2030  }
2031  } else if (NeedsUnwrappedLine) {
2032  addUnwrappedLine();
2033  }
2034 }
2035 
2036 void UnwrappedLineParser::parseTryCatch() {
2037  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2038  nextToken();
2039  bool NeedsUnwrappedLine = false;
2040  if (FormatTok->is(tok::colon)) {
2041  // We are in a function try block, what comes is an initializer list.
2042  nextToken();
2043 
2044  // In case identifiers were removed by clang-tidy, what might follow is
2045  // multiple commas in sequence - before the first identifier.
2046  while (FormatTok->is(tok::comma))
2047  nextToken();
2048 
2049  while (FormatTok->is(tok::identifier)) {
2050  nextToken();
2051  if (FormatTok->is(tok::l_paren))
2052  parseParens();
2053  if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2054  FormatTok->is(tok::l_brace)) {
2055  do {
2056  nextToken();
2057  } while (!FormatTok->is(tok::r_brace));
2058  nextToken();
2059  }
2060 
2061  // In case identifiers were removed by clang-tidy, what might follow is
2062  // multiple commas in sequence - after the first identifier.
2063  while (FormatTok->is(tok::comma))
2064  nextToken();
2065  }
2066  }
2067  // Parse try with resource.
2068  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2069  parseParens();
2070  }
2071  if (FormatTok->is(tok::l_brace)) {
2072  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2073  parseBlock(/*MustBeDeclaration=*/false);
2074  if (Style.BraceWrapping.BeforeCatch) {
2075  addUnwrappedLine();
2076  } else {
2077  NeedsUnwrappedLine = true;
2078  }
2079  } else if (!FormatTok->is(tok::kw_catch)) {
2080  // The C++ standard requires a compound-statement after a try.
2081  // If there's none, we try to assume there's a structuralElement
2082  // and try to continue.
2083  addUnwrappedLine();
2084  ++Line->Level;
2085  parseStructuralElement();
2086  --Line->Level;
2087  }
2088  while (1) {
2089  if (FormatTok->is(tok::at))
2090  nextToken();
2091  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2092  tok::kw___finally) ||
2093  ((Style.Language == FormatStyle::LK_Java ||
2094  Style.Language == FormatStyle::LK_JavaScript) &&
2095  FormatTok->is(Keywords.kw_finally)) ||
2096  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2097  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2098  break;
2099  nextToken();
2100  while (FormatTok->isNot(tok::l_brace)) {
2101  if (FormatTok->is(tok::l_paren)) {
2102  parseParens();
2103  continue;
2104  }
2105  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2106  return;
2107  nextToken();
2108  }
2109  NeedsUnwrappedLine = false;
2110  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2111  parseBlock(/*MustBeDeclaration=*/false);
2112  if (Style.BraceWrapping.BeforeCatch)
2113  addUnwrappedLine();
2114  else
2115  NeedsUnwrappedLine = true;
2116  }
2117  if (NeedsUnwrappedLine)
2118  addUnwrappedLine();
2119 }
2120 
2121 void UnwrappedLineParser::parseNamespace() {
2122  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2123  "'namespace' expected");
2124 
2125  const FormatToken &InitialToken = *FormatTok;
2126  nextToken();
2127  if (InitialToken.is(TT_NamespaceMacro)) {
2128  parseParens();
2129  } else {
2130  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2131  tok::l_square)) {
2132  if (FormatTok->is(tok::l_square))
2133  parseSquare();
2134  else
2135  nextToken();
2136  }
2137  }
2138  if (FormatTok->Tok.is(tok::l_brace)) {
2139  if (ShouldBreakBeforeBrace(Style, InitialToken))
2140  addUnwrappedLine();
2141 
2142  unsigned AddLevels =
2143  Style.NamespaceIndentation == FormatStyle::NI_All ||
2144  (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2145  DeclarationScopeStack.size() > 1)
2146  ? 1u
2147  : 0u;
2148  bool ManageWhitesmithsBraces =
2149  AddLevels == 0u &&
2150  Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2151 
2152  // If we're in Whitesmiths mode, indent the brace if we're not indenting
2153  // the whole block.
2154  if (ManageWhitesmithsBraces)
2155  ++Line->Level;
2156 
2157  parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2158  /*MunchSemi=*/true,
2159  /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2160 
2161  // Munch the semicolon after a namespace. This is more common than one would
2162  // think. Putting the semicolon into its own line is very ugly.
2163  if (FormatTok->Tok.is(tok::semi))
2164  nextToken();
2165 
2166  addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2167 
2168  if (ManageWhitesmithsBraces)
2169  --Line->Level;
2170  }
2171  // FIXME: Add error handling.
2172 }
2173 
2174 void UnwrappedLineParser::parseNew() {
2175  assert(FormatTok->is(tok::kw_new) && "'new' expected");
2176  nextToken();
2177 
2178  if (Style.isCSharp()) {
2179  do {
2180  if (FormatTok->is(tok::l_brace))
2181  parseBracedList();
2182 
2183  if (FormatTok->isOneOf(tok::semi, tok::comma))
2184  return;
2185 
2186  nextToken();
2187  } while (!eof());
2188  }
2189 
2190  if (Style.Language != FormatStyle::LK_Java)
2191  return;
2192 
2193  // In Java, we can parse everything up to the parens, which aren't optional.
2194  do {
2195  // There should not be a ;, { or } before the new's open paren.
2196  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2197  return;
2198 
2199  // Consume the parens.
2200  if (FormatTok->is(tok::l_paren)) {
2201  parseParens();
2202 
2203  // If there is a class body of an anonymous class, consume that as child.
2204  if (FormatTok->is(tok::l_brace))
2205  parseChildBlock();
2206  return;
2207  }
2208  nextToken();
2209  } while (!eof());
2210 }
2211 
2212 void UnwrappedLineParser::parseForOrWhileLoop() {
2213  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2214  "'for', 'while' or foreach macro expected");
2215  nextToken();
2216  // JS' for await ( ...
2217  if (Style.Language == FormatStyle::LK_JavaScript &&
2218  FormatTok->is(Keywords.kw_await))
2219  nextToken();
2220  if (FormatTok->Tok.is(tok::l_paren))
2221  parseParens();
2222  if (FormatTok->Tok.is(tok::l_brace)) {
2223  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2224  parseBlock(/*MustBeDeclaration=*/false);
2225  addUnwrappedLine();
2226  } else {
2227  addUnwrappedLine();
2228  ++Line->Level;
2229  parseStructuralElement();
2230  --Line->Level;
2231  }
2232 }
2233 
2234 void UnwrappedLineParser::parseDoWhile() {
2235  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2236  nextToken();
2237  if (FormatTok->Tok.is(tok::l_brace)) {
2238  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2239  parseBlock(/*MustBeDeclaration=*/false);
2240  if (Style.BraceWrapping.BeforeWhile)
2241  addUnwrappedLine();
2242  } else {
2243  addUnwrappedLine();
2244  ++Line->Level;
2245  parseStructuralElement();
2246  --Line->Level;
2247  }
2248 
2249  // FIXME: Add error handling.
2250  if (!FormatTok->Tok.is(tok::kw_while)) {
2251  addUnwrappedLine();
2252  return;
2253  }
2254 
2255  // If in Whitesmiths mode, the line with the while() needs to be indented
2256  // to the same level as the block.
2257  if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2258  ++Line->Level;
2259 
2260  nextToken();
2261  parseStructuralElement();
2262 }
2263 
2264 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2265  nextToken();
2266  unsigned OldLineLevel = Line->Level;
2267  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2268  --Line->Level;
2269  if (LeftAlignLabel)
2270  Line->Level = 0;
2271 
2272  if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2273  FormatTok->Tok.is(tok::l_brace)) {
2274 
2275  CompoundStatementIndenter Indenter(this, Line->Level,
2276  Style.BraceWrapping.AfterCaseLabel,
2277  Style.BraceWrapping.IndentBraces);
2278  parseBlock(/*MustBeDeclaration=*/false);
2279  if (FormatTok->Tok.is(tok::kw_break)) {
2280  if (Style.BraceWrapping.AfterControlStatement ==
2281  FormatStyle::BWACS_Always) {
2282  addUnwrappedLine();
2283  if (!Style.IndentCaseBlocks &&
2284  Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2285  Line->Level++;
2286  }
2287  }
2288  parseStructuralElement();
2289  }
2290  addUnwrappedLine();
2291  } else {
2292  if (FormatTok->is(tok::semi))
2293  nextToken();
2294  addUnwrappedLine();
2295  }
2296  Line->Level = OldLineLevel;
2297  if (FormatTok->isNot(tok::l_brace)) {
2298  parseStructuralElement();
2299  addUnwrappedLine();
2300  }
2301 }
2302 
2303 void UnwrappedLineParser::parseCaseLabel() {
2304  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2305 
2306  // FIXME: fix handling of complex expressions here.
2307  do {
2308  nextToken();
2309  } while (!eof() && !FormatTok->Tok.is(tok::colon));
2310  parseLabel();
2311 }
2312 
2313 void UnwrappedLineParser::parseSwitch() {
2314  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2315  nextToken();
2316  if (FormatTok->Tok.is(tok::l_paren))
2317  parseParens();
2318  if (FormatTok->Tok.is(tok::l_brace)) {
2319  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2320  parseBlock(/*MustBeDeclaration=*/false);
2321  addUnwrappedLine();
2322  } else {
2323  addUnwrappedLine();
2324  ++Line->Level;
2325  parseStructuralElement();
2326  --Line->Level;
2327  }
2328 }
2329 
2330 void UnwrappedLineParser::parseAccessSpecifier() {
2331  nextToken();
2332  // Understand Qt's slots.
2333  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2334  nextToken();
2335  // Otherwise, we don't know what it is, and we'd better keep the next token.
2336  if (FormatTok->Tok.is(tok::colon))
2337  nextToken();
2338  addUnwrappedLine();
2339 }
2340 
2341 void UnwrappedLineParser::parseConcept() {
2342  assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2343  nextToken();
2344  if (!FormatTok->Tok.is(tok::identifier))
2345  return;
2346  nextToken();
2347  if (!FormatTok->Tok.is(tok::equal))
2348  return;
2349  nextToken();
2350  if (FormatTok->Tok.is(tok::kw_requires)) {
2351  nextToken();
2352  parseRequiresExpression(Line->Level);
2353  } else {
2354  parseConstraintExpression(Line->Level);
2355  }
2356 }
2357 
2358 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2359  // requires (R range)
2360  if (FormatTok->Tok.is(tok::l_paren)) {
2361  parseParens();
2362  if (Style.IndentRequires && OriginalLevel != Line->Level) {
2363  addUnwrappedLine();
2364  --Line->Level;
2365  }
2366  }
2367 
2368  if (FormatTok->Tok.is(tok::l_brace)) {
2369  if (Style.BraceWrapping.AfterFunction)
2370  addUnwrappedLine();
2371  FormatTok->setType(TT_FunctionLBrace);
2372  parseBlock(/*MustBeDeclaration=*/false);
2373  addUnwrappedLine();
2374  } else {
2375  parseConstraintExpression(OriginalLevel);
2376  }
2377 }
2378 
2379 void UnwrappedLineParser::parseConstraintExpression(
2380  unsigned int OriginalLevel) {
2381  // requires Id<T> && Id<T> || Id<T>
2382  while (
2383  FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2384  nextToken();
2385  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2386  tok::greater, tok::comma, tok::ellipsis)) {
2387  if (FormatTok->Tok.is(tok::less)) {
2388  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2389  /*ClosingBraceKind=*/tok::greater);
2390  continue;
2391  }
2392  nextToken();
2393  }
2394  if (FormatTok->Tok.is(tok::kw_requires)) {
2395  parseRequiresExpression(OriginalLevel);
2396  }
2397  if (FormatTok->Tok.is(tok::less)) {
2398  parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2399  /*ClosingBraceKind=*/tok::greater);
2400  }
2401 
2402  if (FormatTok->Tok.is(tok::l_paren)) {
2403  parseParens();
2404  }
2405  if (FormatTok->Tok.is(tok::l_brace)) {
2406  if (Style.BraceWrapping.AfterFunction)
2407  addUnwrappedLine();
2408  FormatTok->setType(TT_FunctionLBrace);
2409  parseBlock(/*MustBeDeclaration=*/false);
2410  }
2411  if (FormatTok->Tok.is(tok::semi)) {
2412  // Eat any trailing semi.
2413  nextToken();
2414  addUnwrappedLine();
2415  }
2416  if (FormatTok->Tok.is(tok::colon)) {
2417  return;
2418  }
2419  if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2420  if (FormatTok->Previous &&
2421  !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2422  tok::coloncolon)) {
2423  addUnwrappedLine();
2424  }
2425  if (Style.IndentRequires && OriginalLevel != Line->Level) {
2426  --Line->Level;
2427  }
2428  break;
2429  } else {
2430  FormatTok->setType(TT_ConstraintJunctions);
2431  }
2432 
2433  nextToken();
2434  }
2435 }
2436 
2437 void UnwrappedLineParser::parseRequires() {
2438  assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2439 
2440  unsigned OriginalLevel = Line->Level;
2441  if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2442  addUnwrappedLine();
2443  if (Style.IndentRequires) {
2444  Line->Level++;
2445  }
2446  }
2447  nextToken();
2448 
2449  parseRequiresExpression(OriginalLevel);
2450 }
2451 
2452 bool UnwrappedLineParser::parseEnum() {
2453  // Won't be 'enum' for NS_ENUMs.
2454  if (FormatTok->Tok.is(tok::kw_enum))
2455  nextToken();
2456 
2457  // In TypeScript, "enum" can also be used as property name, e.g. in interface
2458  // declarations. An "enum" keyword followed by a colon would be a syntax
2459  // error and thus assume it is just an identifier.
2460  if (Style.Language == FormatStyle::LK_JavaScript &&
2461  FormatTok->isOneOf(tok::colon, tok::question))
2462  return false;
2463 
2464  // In protobuf, "enum" can be used as a field name.
2465  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2466  return false;
2467 
2468  // Eat up enum class ...
2469  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2470  nextToken();
2471 
2472  while (FormatTok->Tok.getIdentifierInfo() ||
2473  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2474  tok::greater, tok::comma, tok::question)) {
2475  nextToken();
2476  // We can have macros or attributes in between 'enum' and the enum name.
2477  if (FormatTok->is(tok::l_paren))
2478  parseParens();
2479  if (FormatTok->is(tok::identifier)) {
2480  nextToken();
2481  // If there are two identifiers in a row, this is likely an elaborate
2482  // return type. In Java, this can be "implements", etc.
2483  if (Style.isCpp() && FormatTok->is(tok::identifier))
2484  return false;
2485  }
2486  }
2487 
2488  // Just a declaration or something is wrong.
2489  if (FormatTok->isNot(tok::l_brace))
2490  return true;
2491  FormatTok->setBlockKind(BK_Block);
2492 
2493  if (Style.Language == FormatStyle::LK_Java) {
2494  // Java enums are different.
2495  parseJavaEnumBody();
2496  return true;
2497  }
2498  if (Style.Language == FormatStyle::LK_Proto) {
2499  parseBlock(/*MustBeDeclaration=*/true);
2500  return true;
2501  }
2502 
2503  if (!Style.AllowShortEnumsOnASingleLine)
2504  addUnwrappedLine();
2505  // Parse enum body.
2506  nextToken();
2507  if (!Style.AllowShortEnumsOnASingleLine) {
2508  addUnwrappedLine();
2509  Line->Level += 1;
2510  }
2511  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2512  /*IsEnum=*/true);
2513  if (!Style.AllowShortEnumsOnASingleLine)
2514  Line->Level -= 1;
2515  if (HasError) {
2516  if (FormatTok->is(tok::semi))
2517  nextToken();
2518  addUnwrappedLine();
2519  }
2520  return true;
2521 
2522  // There is no addUnwrappedLine() here so that we fall through to parsing a
2523  // structural element afterwards. Thus, in "enum A {} n, m;",
2524  // "} n, m;" will end up in one unwrapped line.
2525 }
2526 
2527 bool UnwrappedLineParser::parseStructLike() {
2528  // parseRecord falls through and does not yet add an unwrapped line as a
2529  // record declaration or definition can start a structural element.
2530  parseRecord();
2531  // This does not apply to Java, JavaScript and C#.
2532  if (Style.Language == FormatStyle::LK_Java ||
2533  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2534  if (FormatTok->is(tok::semi))
2535  nextToken();
2536  addUnwrappedLine();
2537  return true;
2538  }
2539  return false;
2540 }
2541 
2542 namespace {
2543 // A class used to set and restore the Token position when peeking
2544 // ahead in the token source.
2545 class ScopedTokenPosition {
2546  unsigned StoredPosition;
2547  FormatTokenSource *Tokens;
2548 
2549 public:
2550  ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2551  assert(Tokens && "Tokens expected to not be null");
2552  StoredPosition = Tokens->getPosition();
2553  }
2554 
2555  ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2556 };
2557 } // namespace
2558 
2559 // Look to see if we have [[ by looking ahead, if
2560 // its not then rewind to the original position.
2561 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2562  ScopedTokenPosition AutoPosition(Tokens);
2563  FormatToken *Tok = Tokens->getNextToken();
2564  // We already read the first [ check for the second.
2565  if (Tok && !Tok->is(tok::l_square)) {
2566  return false;
2567  }
2568  // Double check that the attribute is just something
2569  // fairly simple.
2570  while (Tok) {
2571  if (Tok->is(tok::r_square)) {
2572  break;
2573  }
2574  Tok = Tokens->getNextToken();
2575  }
2576  Tok = Tokens->getNextToken();
2577  if (Tok && !Tok->is(tok::r_square)) {
2578  return false;
2579  }
2580  Tok = Tokens->getNextToken();
2581  if (Tok && Tok->is(tok::semi)) {
2582  return false;
2583  }
2584  return true;
2585 }
2586 
2587 void UnwrappedLineParser::parseJavaEnumBody() {
2588  // Determine whether the enum is simple, i.e. does not have a semicolon or
2589  // constants with class bodies. Simple enums can be formatted like braced
2590  // lists, contracted to a single line, etc.
2591  unsigned StoredPosition = Tokens->getPosition();
2592  bool IsSimple = true;
2593  FormatToken *Tok = Tokens->getNextToken();
2594  while (Tok) {
2595  if (Tok->is(tok::r_brace))
2596  break;
2597  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2598  IsSimple = false;
2599  break;
2600  }
2601  // FIXME: This will also mark enums with braces in the arguments to enum
2602  // constants as "not simple". This is probably fine in practice, though.
2603  Tok = Tokens->getNextToken();
2604  }
2605  FormatTok = Tokens->setPosition(StoredPosition);
2606 
2607  if (IsSimple) {
2608  nextToken();
2609  parseBracedList();
2610  addUnwrappedLine();
2611  return;
2612  }
2613 
2614  // Parse the body of a more complex enum.
2615  // First add a line for everything up to the "{".
2616  nextToken();
2617  addUnwrappedLine();
2618  ++Line->Level;
2619 
2620  // Parse the enum constants.
2621  while (FormatTok) {
2622  if (FormatTok->is(tok::l_brace)) {
2623  // Parse the constant's class body.
2624  parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2625  /*MunchSemi=*/false);
2626  } else if (FormatTok->is(tok::l_paren)) {
2627  parseParens();
2628  } else if (FormatTok->is(tok::comma)) {
2629  nextToken();
2630  addUnwrappedLine();
2631  } else if (FormatTok->is(tok::semi)) {
2632  nextToken();
2633  addUnwrappedLine();
2634  break;
2635  } else if (FormatTok->is(tok::r_brace)) {
2636  addUnwrappedLine();
2637  break;
2638  } else {
2639  nextToken();
2640  }
2641  }
2642 
2643  // Parse the class body after the enum's ";" if any.
2644  parseLevel(/*HasOpeningBrace=*/true);
2645  nextToken();
2646  --Line->Level;
2647  addUnwrappedLine();
2648 }
2649 
2650 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2651  const FormatToken &InitialToken = *FormatTok;
2652  nextToken();
2653 
2654  // The actual identifier can be a nested name specifier, and in macros
2655  // it is often token-pasted.
2656  // An [[attribute]] can be before the identifier.
2657  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2658  tok::kw___attribute, tok::kw___declspec,
2659  tok::kw_alignas, tok::l_square, tok::r_square) ||
2660  ((Style.Language == FormatStyle::LK_Java ||
2661  Style.Language == FormatStyle::LK_JavaScript) &&
2662  FormatTok->isOneOf(tok::period, tok::comma))) {
2663  if (Style.Language == FormatStyle::LK_JavaScript &&
2664  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2665  // JavaScript/TypeScript supports inline object types in
2666  // extends/implements positions:
2667  // class Foo implements {bar: number} { }
2668  nextToken();
2669  if (FormatTok->is(tok::l_brace)) {
2670  tryToParseBracedList();
2671  continue;
2672  }
2673  }
2674  bool IsNonMacroIdentifier =
2675  FormatTok->is(tok::identifier) &&
2676  FormatTok->TokenText != FormatTok->TokenText.upper();
2677  nextToken();
2678  // We can have macros or attributes in between 'class' and the class name.
2679  if (!IsNonMacroIdentifier) {
2680  if (FormatTok->Tok.is(tok::l_paren)) {
2681  parseParens();
2682  } else if (FormatTok->is(TT_AttributeSquare)) {
2683  parseSquare();
2684  // Consume the closing TT_AttributeSquare.
2685  if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2686  nextToken();
2687  }
2688  }
2689  }
2690 
2691  // Note that parsing away template declarations here leads to incorrectly
2692  // accepting function declarations as record declarations.
2693  // In general, we cannot solve this problem. Consider:
2694  // class A<int> B() {}
2695  // which can be a function definition or a class definition when B() is a
2696  // macro. If we find enough real-world cases where this is a problem, we
2697  // can parse for the 'template' keyword in the beginning of the statement,
2698  // and thus rule out the record production in case there is no template
2699  // (this would still leave us with an ambiguity between template function
2700  // and class declarations).
2701  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2702  while (!eof()) {
2703  if (FormatTok->is(tok::l_brace)) {
2704  calculateBraceTypes(/*ExpectClassBody=*/true);
2705  if (!tryToParseBracedList())
2706  break;
2707  }
2708  if (FormatTok->Tok.is(tok::semi))
2709  return;
2710  if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2711  addUnwrappedLine();
2712  nextToken();
2713  parseCSharpGenericTypeConstraint();
2714  break;
2715  }
2716  nextToken();
2717  }
2718  }
2719  if (FormatTok->Tok.is(tok::l_brace)) {
2720  if (ParseAsExpr) {
2721  parseChildBlock();
2722  } else {
2723  if (ShouldBreakBeforeBrace(Style, InitialToken))
2724  addUnwrappedLine();
2725 
2726  unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2727  parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2728  }
2729  }
2730  // There is no addUnwrappedLine() here so that we fall through to parsing a
2731  // structural element afterwards. Thus, in "class A {} n, m;",
2732  // "} n, m;" will end up in one unwrapped line.
2733 }
2734 
2735 void UnwrappedLineParser::parseObjCMethod() {
2736  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2737  "'(' or identifier expected.");
2738  do {
2739  if (FormatTok->Tok.is(tok::semi)) {
2740  nextToken();
2741  addUnwrappedLine();
2742  return;
2743  } else if (FormatTok->Tok.is(tok::l_brace)) {
2744  if (Style.BraceWrapping.AfterFunction)
2745  addUnwrappedLine();
2746  parseBlock(/*MustBeDeclaration=*/false);
2747  addUnwrappedLine();
2748  return;
2749  } else {
2750  nextToken();
2751  }
2752  } while (!eof());
2753 }
2754 
2755 void UnwrappedLineParser::parseObjCProtocolList() {
2756  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2757  do {
2758  nextToken();
2759  // Early exit in case someone forgot a close angle.
2760  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2761  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2762  return;
2763  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2764  nextToken(); // Skip '>'.
2765 }
2766 
2767 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2768  do {
2769  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2770  nextToken();
2771  addUnwrappedLine();
2772  break;
2773  }
2774  if (FormatTok->is(tok::l_brace)) {
2775  parseBlock(/*MustBeDeclaration=*/false);
2776  // In ObjC interfaces, nothing should be following the "}".
2777  addUnwrappedLine();
2778  } else if (FormatTok->is(tok::r_brace)) {
2779  // Ignore stray "}". parseStructuralElement doesn't consume them.
2780  nextToken();
2781  addUnwrappedLine();
2782  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2783  nextToken();
2784  parseObjCMethod();
2785  } else {
2786  parseStructuralElement();
2787  }
2788  } while (!eof());
2789 }
2790 
2791 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2792  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2793  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2794  nextToken();
2795  nextToken(); // interface name
2796 
2797  // @interface can be followed by a lightweight generic
2798  // specialization list, then either a base class or a category.
2799  if (FormatTok->Tok.is(tok::less)) {
2800  parseObjCLightweightGenerics();
2801  }
2802  if (FormatTok->Tok.is(tok::colon)) {
2803  nextToken();
2804  nextToken(); // base class name
2805  // The base class can also have lightweight generics applied to it.
2806  if (FormatTok->Tok.is(tok::less)) {
2807  parseObjCLightweightGenerics();
2808  }
2809  } else if (FormatTok->Tok.is(tok::l_paren))
2810  // Skip category, if present.
2811  parseParens();
2812 
2813  if (FormatTok->Tok.is(tok::less))
2814  parseObjCProtocolList();
2815 
2816  if (FormatTok->Tok.is(tok::l_brace)) {
2817  if (Style.BraceWrapping.AfterObjCDeclaration)
2818  addUnwrappedLine();
2819  parseBlock(/*MustBeDeclaration=*/true);
2820  }
2821 
2822  // With instance variables, this puts '}' on its own line. Without instance
2823  // variables, this ends the @interface line.
2824  addUnwrappedLine();
2825 
2826  parseObjCUntilAtEnd();
2827 }
2828 
2829 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2830  assert(FormatTok->Tok.is(tok::less));
2831  // Unlike protocol lists, generic parameterizations support
2832  // nested angles:
2833  //
2834  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2835  // NSObject <NSCopying, NSSecureCoding>
2836  //
2837  // so we need to count how many open angles we have left.
2838  unsigned NumOpenAngles = 1;
2839  do {
2840  nextToken();
2841  // Early exit in case someone forgot a close angle.
2842  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2843  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2844  break;
2845  if (FormatTok->Tok.is(tok::less))
2846  ++NumOpenAngles;
2847  else if (FormatTok->Tok.is(tok::greater)) {
2848  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2849  --NumOpenAngles;
2850  }
2851  } while (!eof() && NumOpenAngles != 0);
2852  nextToken(); // Skip '>'.
2853 }
2854 
2855 // Returns true for the declaration/definition form of @protocol,
2856 // false for the expression form.
2857 bool UnwrappedLineParser::parseObjCProtocol() {
2858  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2859  nextToken();
2860 
2861  if (FormatTok->is(tok::l_paren))
2862  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2863  return false;
2864 
2865  // The definition/declaration form,
2866  // @protocol Foo
2867  // - (int)someMethod;
2868  // @end
2869 
2870  nextToken(); // protocol name
2871 
2872  if (FormatTok->Tok.is(tok::less))
2873  parseObjCProtocolList();
2874 
2875  // Check for protocol declaration.
2876  if (FormatTok->Tok.is(tok::semi)) {
2877  nextToken();
2878  addUnwrappedLine();
2879  return true;
2880  }
2881 
2882  addUnwrappedLine();
2883  parseObjCUntilAtEnd();
2884  return true;
2885 }
2886 
2887 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2888  bool IsImport = FormatTok->is(Keywords.kw_import);
2889  assert(IsImport || FormatTok->is(tok::kw_export));
2890  nextToken();
2891 
2892  // Consume the "default" in "export default class/function".
2893  if (FormatTok->is(tok::kw_default))
2894  nextToken();
2895 
2896  // Consume "async function", "function" and "default function", so that these
2897  // get parsed as free-standing JS functions, i.e. do not require a trailing
2898  // semicolon.
2899  if (FormatTok->is(Keywords.kw_async))
2900  nextToken();
2901  if (FormatTok->is(Keywords.kw_function)) {
2902  nextToken();
2903  return;
2904  }
2905 
2906  // For imports, `export *`, `export {...}`, consume the rest of the line up
2907  // to the terminating `;`. For everything else, just return and continue
2908  // parsing the structural element, i.e. the declaration or expression for
2909  // `export default`.
2910  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2911  !FormatTok->isStringLiteral())
2912  return;
2913 
2914  while (!eof()) {
2915  if (FormatTok->is(tok::semi))
2916  return;
2917  if (Line->Tokens.empty()) {
2918  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2919  // import statement should terminate.
2920  return;
2921  }
2922  if (FormatTok->is(tok::l_brace)) {
2923  FormatTok->setBlockKind(BK_Block);
2924  nextToken();
2925  parseBracedList();
2926  } else {
2927  nextToken();
2928  }
2929  }
2930 }
2931 
2932 void UnwrappedLineParser::parseStatementMacro() {
2933  nextToken();
2934  if (FormatTok->is(tok::l_paren))
2935  parseParens();
2936  if (FormatTok->is(tok::semi))
2937  nextToken();
2938  addUnwrappedLine();
2939 }
2940 
2941 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2942  StringRef Prefix = "") {
2943  llvm::dbgs() << Prefix << "Line(" << Line.Level
2944  << ", FSC=" << Line.FirstStartColumn << ")"
2945  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2946  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2947  E = Line.Tokens.end();
2948  I != E; ++I) {
2949  llvm::dbgs() << I->Tok->Tok.getName() << "["
2950  << "T=" << (unsigned)I->Tok->getType()
2951  << ", OC=" << I->Tok->OriginalColumn << "] ";
2952  }
2953  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2954  E = Line.Tokens.end();
2955  I != E; ++I) {
2956  const UnwrappedLineNode &Node = *I;
2958  I = Node.Children.begin(),
2959  E = Node.Children.end();
2960  I != E; ++I) {
2961  printDebugInfo(*I, "\nChild: ");
2962  }
2963  }
2964  llvm::dbgs() << "\n";
2965 }
2966 
2967 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
2968  if (Line->Tokens.empty())
2969  return;
2970  LLVM_DEBUG({
2971  if (CurrentLines == &Lines)
2972  printDebugInfo(*Line);
2973  });
2974 
2975  // If this line closes a block when in Whitesmiths mode, remember that
2976  // information so that the level can be decreased after the line is added.
2977  // This has to happen after the addition of the line since the line itself
2978  // needs to be indented.
2979  bool ClosesWhitesmithsBlock =
2980  Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
2981  Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2982 
2983  CurrentLines->push_back(std::move(*Line));
2984  Line->Tokens.clear();
2985  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2986  Line->FirstStartColumn = 0;
2987 
2988  if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
2989  --Line->Level;
2990  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2991  CurrentLines->append(
2992  std::make_move_iterator(PreprocessorDirectives.begin()),
2993  std::make_move_iterator(PreprocessorDirectives.end()));
2994  PreprocessorDirectives.clear();
2995  }
2996  // Disconnect the current token from the last token on the previous line.
2997  FormatTok->Previous = nullptr;
2998 }
2999 
3000 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3001 
3002 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3003  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3004  FormatTok.NewlinesBefore > 0;
3005 }
3006 
3007 // Checks if \p FormatTok is a line comment that continues the line comment
3008 // section on \p Line.
3009 static bool
3011  const UnwrappedLine &Line,
3012  const llvm::Regex &CommentPragmasRegex) {
3013  if (Line.Tokens.empty())
3014  return false;
3015 
3016  StringRef IndentContent = FormatTok.TokenText;
3017  if (FormatTok.TokenText.startswith("//") ||
3018  FormatTok.TokenText.startswith("/*"))
3019  IndentContent = FormatTok.TokenText.substr(2);
3020  if (CommentPragmasRegex.match(IndentContent))
3021  return false;
3022 
3023  // If Line starts with a line comment, then FormatTok continues the comment
3024  // section if its original column is greater or equal to the original start
3025  // column of the line.
3026  //
3027  // Define the min column token of a line as follows: if a line ends in '{' or
3028  // contains a '{' followed by a line comment, then the min column token is
3029  // that '{'. Otherwise, the min column token of the line is the first token of
3030  // the line.
3031  //
3032  // If Line starts with a token other than a line comment, then FormatTok
3033  // continues the comment section if its original column is greater than the
3034  // original start column of the min column token of the line.
3035  //
3036  // For example, the second line comment continues the first in these cases:
3037  //
3038  // // first line
3039  // // second line
3040  //
3041  // and:
3042  //
3043  // // first line
3044  // // second line
3045  //
3046  // and:
3047  //
3048  // int i; // first line
3049  // // second line
3050  //
3051  // and:
3052  //
3053  // do { // first line
3054  // // second line
3055  // int i;
3056  // } while (true);
3057  //
3058  // and:
3059  //
3060  // enum {
3061  // a, // first line
3062  // // second line
3063  // b
3064  // };
3065  //
3066  // The second line comment doesn't continue the first in these cases:
3067  //
3068  // // first line
3069  // // second line
3070  //
3071  // and:
3072  //
3073  // int i; // first line
3074  // // second line
3075  //
3076  // and:
3077  //
3078  // do { // first line
3079  // // second line
3080  // int i;
3081  // } while (true);
3082  //
3083  // and:
3084  //
3085  // enum {
3086  // a, // first line
3087  // // second line
3088  // };
3089  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3090 
3091  // Scan for '{//'. If found, use the column of '{' as a min column for line
3092  // comment section continuation.
3093  const FormatToken *PreviousToken = nullptr;
3094  for (const UnwrappedLineNode &Node : Line.Tokens) {
3095  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3096  isLineComment(*Node.Tok)) {
3097  MinColumnToken = PreviousToken;
3098  break;
3099  }
3100  PreviousToken = Node.Tok;
3101 
3102  // Grab the last newline preceding a token in this unwrapped line.
3103  if (Node.Tok->NewlinesBefore > 0) {
3104  MinColumnToken = Node.Tok;
3105  }
3106  }
3107  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3108  MinColumnToken = PreviousToken;
3109  }
3110 
3111  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3112  MinColumnToken);
3113 }
3114 
3115 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3116  bool JustComments = Line->Tokens.empty();
3118  I = CommentsBeforeNextToken.begin(),
3119  E = CommentsBeforeNextToken.end();
3120  I != E; ++I) {
3121  // Line comments that belong to the same line comment section are put on the
3122  // same line since later we might want to reflow content between them.
3123  // Additional fine-grained breaking of line comment sections is controlled
3124  // by the class BreakableLineCommentSection in case it is desirable to keep
3125  // several line comment sections in the same unwrapped line.
3126  //
3127  // FIXME: Consider putting separate line comment sections as children to the
3128  // unwrapped line instead.
3129  (*I)->ContinuesLineCommentSection =
3130  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3131  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3132  addUnwrappedLine();
3133  pushToken(*I);
3134  }
3135  if (NewlineBeforeNext && JustComments)
3136  addUnwrappedLine();
3137  CommentsBeforeNextToken.clear();
3138 }
3139 
3140 void UnwrappedLineParser::nextToken(int LevelDifference) {
3141  if (eof())
3142  return;
3143  flushComments(isOnNewLine(*FormatTok));
3144  pushToken(FormatTok);
3145  FormatToken *Previous = FormatTok;
3146  if (Style.Language != FormatStyle::LK_JavaScript)
3147  readToken(LevelDifference);
3148  else
3149  readTokenWithJavaScriptASI();
3150  FormatTok->Previous = Previous;
3151 }
3152 
3153 void UnwrappedLineParser::distributeComments(
3154  const SmallVectorImpl<FormatToken *> &Comments,
3155  const FormatToken *NextTok) {
3156  // Whether or not a line comment token continues a line is controlled by
3157  // the method continuesLineCommentSection, with the following caveat:
3158  //
3159  // Define a trail of Comments to be a nonempty proper postfix of Comments such
3160  // that each comment line from the trail is aligned with the next token, if
3161  // the next token exists. If a trail exists, the beginning of the maximal
3162  // trail is marked as a start of a new comment section.
3163  //
3164  // For example in this code:
3165  //
3166  // int a; // line about a
3167  // // line 1 about b
3168  // // line 2 about b
3169  // int b;
3170  //
3171  // the two lines about b form a maximal trail, so there are two sections, the
3172  // first one consisting of the single comment "// line about a" and the
3173  // second one consisting of the next two comments.
3174  if (Comments.empty())
3175  return;
3176  bool ShouldPushCommentsInCurrentLine = true;
3177  bool HasTrailAlignedWithNextToken = false;
3178  unsigned StartOfTrailAlignedWithNextToken = 0;
3179  if (NextTok) {
3180  // We are skipping the first element intentionally.
3181  for (unsigned i = Comments.size() - 1; i > 0; --i) {
3182  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3183  HasTrailAlignedWithNextToken = true;
3184  StartOfTrailAlignedWithNextToken = i;
3185  }
3186  }
3187  }
3188  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3189  FormatToken *FormatTok = Comments[i];
3190  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3191  FormatTok->ContinuesLineCommentSection = false;
3192  } else {
3193  FormatTok->ContinuesLineCommentSection =
3194  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3195  }
3196  if (!FormatTok->ContinuesLineCommentSection &&
3197  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3198  ShouldPushCommentsInCurrentLine = false;
3199  }
3200  if (ShouldPushCommentsInCurrentLine) {
3201  pushToken(FormatTok);
3202  } else {
3203  CommentsBeforeNextToken.push_back(FormatTok);
3204  }
3205  }
3206 }
3207 
3208 void UnwrappedLineParser::readToken(int LevelDifference) {
3209  SmallVector<FormatToken *, 1> Comments;
3210  do {
3211  FormatTok = Tokens->getNextToken();
3212  assert(FormatTok);
3213  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3214  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3215  distributeComments(Comments, FormatTok);
3216  Comments.clear();
3217  // If there is an unfinished unwrapped line, we flush the preprocessor
3218  // directives only after that unwrapped line was finished later.
3219  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3220  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3221  assert((LevelDifference >= 0 ||
3222  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3223  "LevelDifference makes Line->Level negative");
3224  Line->Level += LevelDifference;
3225  // Comments stored before the preprocessor directive need to be output
3226  // before the preprocessor directive, at the same level as the
3227  // preprocessor directive, as we consider them to apply to the directive.
3228  if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3229  PPBranchLevel > 0)
3230  Line->Level += PPBranchLevel;
3231  flushComments(isOnNewLine(*FormatTok));
3232  parsePPDirective();
3233  }
3234  while (FormatTok->getType() == TT_ConflictStart ||
3235  FormatTok->getType() == TT_ConflictEnd ||
3236  FormatTok->getType() == TT_ConflictAlternative) {
3237  if (FormatTok->getType() == TT_ConflictStart) {
3238  conditionalCompilationStart(/*Unreachable=*/false);
3239  } else if (FormatTok->getType() == TT_ConflictAlternative) {
3240  conditionalCompilationAlternative();
3241  } else if (FormatTok->getType() == TT_ConflictEnd) {
3242  conditionalCompilationEnd();
3243  }
3244  FormatTok = Tokens->getNextToken();
3245  FormatTok->MustBreakBefore = true;
3246  }
3247 
3248  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3249  !Line->InPPDirective) {
3250  continue;
3251  }
3252 
3253  if (!FormatTok->Tok.is(tok::comment)) {
3254  distributeComments(Comments, FormatTok);
3255  Comments.clear();
3256  return;
3257  }
3258 
3259  Comments.push_back(FormatTok);
3260  } while (!eof());
3261 
3262  distributeComments(Comments, nullptr);
3263  Comments.clear();
3264 }
3265 
3266 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3267  Line->Tokens.push_back(UnwrappedLineNode(Tok));
3268  if (MustBreakBeforeNextToken) {
3269  Line->Tokens.back().Tok->MustBreakBefore = true;
3270  MustBreakBeforeNextToken = false;
3271  }
3272 }
3273 
3274 } // end namespace format
3275 } // end namespace clang
clang::format::CompoundStatementIndenter
Definition: UnwrappedLineParser.cpp:172
clang::minimize_source_to_dependency_directives::pp_ifdef
@ pp_ifdef
Definition: DependencyDirectivesSourceMinimizer.h:44
clang::format::AdditionalKeywords::kw_implements
IdentifierInfo * kw_implements
Definition: FormatToken.h:987
clang::format::ScopedLineState
Definition: UnwrappedLineParser.cpp:139
clang::format::FormatTokenSource::getNextToken
virtual FormatToken * getNextToken()=0
clang::format::AdditionalKeywords::kw_NS_OPTIONS
IdentifierInfo * kw_NS_OPTIONS
Definition: FormatToken.h:957
clang::format::AdditionalKeywords::kw_abstract
IdentifierInfo * kw_abstract
Definition: FormatToken.h:984
clang::format::AdditionalKeywords::kw_CF_ENUM
IdentifierInfo * kw_CF_ENUM
Definition: FormatToken.h:953
clang::format::isJSDeclOrStmt
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:976
clang::format::ScopedLineState::~ScopedLineState
~ScopedLineState()
Definition: UnwrappedLineParser.cpp:154
clang::format::mustBeJSIdent
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:952
clang::format::AdditionalKeywords::kw_internal
IdentifierInfo * kw_internal
Definition: FormatToken.h:1023
clang::format::AdditionalKeywords::kw_instanceof
IdentifierInfo * kw_instanceof
Definition: FormatToken.h:988
clang::format::BK_Unknown
@ BK_Unknown
Definition: FormatToken.h:135
clang::format::AdditionalKeywords::kw_from
IdentifierInfo * kw_from
Definition: FormatToken.h:968
clang::format::UnwrappedLineParser::parse
void parse()
Definition: UnwrappedLineParser.cpp:256
clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
clang::format::UnwrappedLine::kInvalidIndex
static const size_t kInvalidIndex
Definition: UnwrappedLineParser.h:61
clang::format::AnnotatedLine::Level
unsigned Level
Definition: TokenAnnotator.h:128
clang::format::AdditionalKeywords::kw_is
IdentifierInfo * kw_is
Definition: FormatToken.h:973
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
clang::minimize_source_to_dependency_directives::pp_ifndef
@ pp_ifndef
Definition: DependencyDirectivesSourceMinimizer.h:45
clang::Parser
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:60
clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:226
clang::format::AdditionalKeywords::kw_CF_CLOSED_ENUM
IdentifierInfo * kw_CF_CLOSED_ENUM
Definition: FormatToken.h:952
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:209
clang::format::AdditionalKeywords
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:831
clang::format::FormatToken::isNot
bool isNot(T Kind) const
Definition: FormatToken.h:461
clang::minimize_source_to_dependency_directives::pp_define
@ pp_define
Definition: DependencyDirectivesSourceMinimizer.h:37
clang::format::UnwrappedLine
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
Definition: UnwrappedLineParser.h:36
clang::format::AdditionalKeywords::kw_as
IdentifierInfo * kw_as
Definition: FormatToken.h:963
clang::format::AdditionalKeywords::kw_signals
IdentifierInfo * kw_signals
Definition: FormatToken.h:1007
clang::format::BK_BracedInit
@ BK_BracedInit
Definition: FormatToken.h:135
clang::format::UnwrappedLineParser
Definition: UnwrappedLineParser.h:75
clang::format::AnnotatedLine::FirstStartColumn
unsigned FirstStartColumn
Definition: TokenAnnotator.h:147
clang::format::FormatTokenSource::~FormatTokenSource
virtual ~FormatTokenSource()
Definition: UnwrappedLineParser.cpp:30
size_t
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c-base.h:70
clang::minimize_source_to_dependency_directives::pp_if
@ pp_if
Definition: DependencyDirectivesSourceMinimizer.h:43
clang::format::AdditionalKeywords::kw_import
IdentifierInfo * kw_import
Definition: FormatToken.h:971
clang::format::UnwrappedLineParser::CompoundStatementIndenter
friend class CompoundStatementIndenter
Definition: UnwrappedLineParser.h:297
clang::format::AdditionalKeywords::kw_await
IdentifierInfo * kw_await
Definition: FormatToken.h:965
clang::format::AdditionalKeywords::kw_extends
IdentifierInfo * kw_extends
Definition: FormatToken.h:986
clang::format::AdditionalKeywords::kw_async
IdentifierInfo * kw_async
Definition: FormatToken.h:964
clang::format::CompoundStatementIndenter::~CompoundStatementIndenter
~CompoundStatementIndenter()
Definition: UnwrappedLineParser.cpp:187
clang::format::AdditionalKeywords::kw_qslots
IdentifierInfo * kw_qslots
Definition: FormatToken.h:1010
clang::format::AdditionalKeywords::kw_NS_ENUM
IdentifierInfo * kw_NS_ENUM
Definition: FormatToken.h:956
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:67
clang::format::FormatToken::Tok
Token Tok
The Token.
Definition: FormatToken.h:220
clang::format::UnwrappedLineConsumer
Definition: UnwrappedLineParser.h:66
clang::format::AdditionalKeywords::kw_throws
IdentifierInfo * kw_throws
Definition: FormatToken.h:993
clang::format::ShouldBreakBeforeBrace
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Definition: UnwrappedLineParser.cpp:699
clang::format::continuesLineCommentSection
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex)
Definition: UnwrappedLineParser.cpp:3010
clang::format::AdditionalKeywords::kw_in
IdentifierInfo * kw_in
Definition: FormatToken.h:950
clang::format::CompoundStatementIndenter::CompoundStatementIndenter
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
Definition: UnwrappedLineParser.cpp:174
clang::format::AdditionalKeywords::kw_var
IdentifierInfo * kw_var
Definition: FormatToken.h:980
clang::format::AdditionalKeywords::kw_of
IdentifierInfo * kw_of
Definition: FormatToken.h:951
clang::format::isGoogScope
static bool isGoogScope(const UnwrappedLine &Line)
Definition: UnwrappedLineParser.cpp:663
clang::format::AdditionalKeywords::kw_finally
IdentifierInfo * kw_finally
Definition: FormatToken.h:967
clang::format::BK_Block
@ BK_Block
Definition: FormatToken.h:135
clang::minimize_source_to_dependency_directives::pp_include
@ pp_include
Definition: DependencyDirectivesSourceMinimizer.h:35
clang::format::FormatTokenSource::setPosition
virtual FormatToken * setPosition(unsigned Position)=0
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:131
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::CompoundStatementIndenter::CompoundStatementIndenter
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
Definition: UnwrappedLineParser.cpp:179
clang::minimize_source_to_dependency_directives::pp_else
@ pp_else
Definition: DependencyDirectivesSourceMinimizer.h:47
clang::format::hash_combine
static void hash_combine(std::size_t &seed, const T &v)
Definition: UnwrappedLineParser.cpp:568
clang::Token::isLiteral
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
UnwrappedLineParser.h
clang::format::UnwrappedLineParser::ScopedLineState
friend class ScopedLineState
Definition: UnwrappedLineParser.h:296
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
P
StringRef P
Definition: ASTMatchersInternal.cpp:563
false
#define false
Definition: stdbool.h:17
clang::format::AdditionalKeywords::kw_let
IdentifierInfo * kw_let
Definition: FormatToken.h:974
llvm::ArrayRef
Definition: LLVM.h:34
clang::format::AdditionalKeywords::kw_get
IdentifierInfo * kw_get
Definition: FormatToken.h:970
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:442
clang::format::AdditionalKeywords::kw_set
IdentifierInfo * kw_set
Definition: FormatToken.h:977
clang::format::FormatToken::isOneOf
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:454
clang::format::ScopedLineState::ScopedLineState
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Definition: UnwrappedLineParser.cpp:141
clang::format::AdditionalKeywords::kw_function
IdentifierInfo * kw_function
Definition: FormatToken.h:969
clang::format::UnwrappedLineConsumer::consumeUnwrappedLine
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
clang::format::FormatTokenSource::getPosition
virtual unsigned getPosition()=0
clang::format::UnwrappedLineNode
Definition: UnwrappedLineParser.h:300
clang::format::AdditionalKeywords::kw_interface
IdentifierInfo * kw_interface
Definition: FormatToken.h:989
clang
Definition: CalledOnceCheck.h:17
Text
StringRef Text
Definition: Format.cpp:2177
clang::format::UnwrappedLineConsumer::finishRun
virtual void finishRun()=0
clang::format::AdditionalKeywords::kw_yield
IdentifierInfo * kw_yield
Definition: FormatToken.h:981
clang::format::AdditionalKeywords::kw___except
IdentifierInfo * kw___except
Definition: FormatToken.h:958
clang::prec::Level
Level
Definition: OperatorPrecedence.h:26
clang::format::AdditionalKeywords::kw_NS_CLOSED_ENUM
IdentifierInfo * kw_NS_CLOSED_ENUM
Definition: FormatToken.h:955
unsigned
clang::minimize_source_to_dependency_directives::pp_endif
@ pp_endif
Definition: DependencyDirectivesSourceMinimizer.h:48
clang::format::AdditionalKeywords::kw_CF_OPTIONS
IdentifierInfo * kw_CF_OPTIONS
Definition: FormatToken.h:954
clang::format::printDebugInfo
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
Definition: UnwrappedLineParser.cpp:2941
FormatToken.h
v
do v
Definition: arm_acle.h:76
clang::format::AdditionalKeywords::kw_qsignals
IdentifierInfo * kw_qsignals
Definition: FormatToken.h:1008
clang::format::AdditionalKeywords::kw_where
IdentifierInfo * kw_where
Definition: FormatToken.h:1043
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
llvm::SmallVectorImpl
Definition: LLVM.h:39
clang::format::mustBeJSIdentOrValue
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Definition: UnwrappedLineParser.cpp:967
Indenter
ContinuationIndenter * Indenter
Definition: UnwrappedLineFormatter.cpp:868
Previous
StateNode * Previous
Definition: UnwrappedLineFormatter.cpp:973
clang::format::tokenCanStartNewLine
static bool tokenCanStartNewLine(const FormatToken &Tok)
Definition: UnwrappedLineParser.cpp:926
clang::format::isIIFE
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Definition: UnwrappedLineParser.cpp:681
clang::format::AdditionalKeywords::kw_slots
IdentifierInfo * kw_slots
Definition: FormatToken.h:1009
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:60
clang::minimize_source_to_dependency_directives::pp_elif
@ pp_elif
Definition: DependencyDirectivesSourceMinimizer.h:46
clang::format::UnwrappedLineParser::UnwrappedLineParser
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback)
Definition: UnwrappedLineParser.cpp:225
clang::format::FormatTokenSource
Definition: UnwrappedLineParser.cpp:28