clang  8.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = llvm::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
178  Parser->addUnwrappedLine();
179  if (Style.BraceWrapping.IndentBraces)
180  ++LineLevel;
181  }
182  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
183 
184 private:
185  unsigned &LineLevel;
186  unsigned OldLineLevel;
187 };
188 
189 namespace {
190 
191 class IndexedTokenSource : public FormatTokenSource {
192 public:
193  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
194  : Tokens(Tokens), Position(-1) {}
195 
196  FormatToken *getNextToken() override {
197  ++Position;
198  return Tokens[Position];
199  }
200 
201  unsigned getPosition() override {
202  assert(Position >= 0);
203  return Position;
204  }
205 
206  FormatToken *setPosition(unsigned P) override {
207  Position = P;
208  return Tokens[Position];
209  }
210 
211  void reset() { Position = -1; }
212 
213 private:
215  int Position;
216 };
217 
218 } // end anonymous namespace
219 
221  const AdditionalKeywords &Keywords,
222  unsigned FirstStartColumn,
224  UnwrappedLineConsumer &Callback)
225  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
226  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
227  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
228  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
229  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
230  ? IG_Rejected
231  : IG_Inited),
232  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
233 
234 void UnwrappedLineParser::reset() {
235  PPBranchLevel = -1;
236  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
237  ? IG_Rejected
238  : IG_Inited;
239  IncludeGuardToken = nullptr;
240  Line.reset(new UnwrappedLine);
241  CommentsBeforeNextToken.clear();
242  FormatTok = nullptr;
243  MustBreakBeforeNextToken = false;
244  PreprocessorDirectives.clear();
245  CurrentLines = &Lines;
246  DeclarationScopeStack.clear();
247  PPStack.clear();
248  Line->FirstStartColumn = FirstStartColumn;
249 }
250 
252  IndexedTokenSource TokenSource(AllTokens);
253  Line->FirstStartColumn = FirstStartColumn;
254  do {
255  LLVM_DEBUG(llvm::dbgs() << "----\n");
256  reset();
257  Tokens = &TokenSource;
258  TokenSource.reset();
259 
260  readToken();
261  parseFile();
262 
263  // If we found an include guard then all preprocessor directives (other than
264  // the guard) are over-indented by one.
265  if (IncludeGuard == IG_Found)
266  for (auto &Line : Lines)
267  if (Line.InPPDirective && Line.Level > 0)
268  --Line.Level;
269 
270  // Create line with eof token.
271  pushToken(FormatTok);
272  addUnwrappedLine();
273 
274  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
275  E = Lines.end();
276  I != E; ++I) {
277  Callback.consumeUnwrappedLine(*I);
278  }
279  Callback.finishRun();
280  Lines.clear();
281  while (!PPLevelBranchIndex.empty() &&
282  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
283  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
284  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
285  }
286  if (!PPLevelBranchIndex.empty()) {
287  ++PPLevelBranchIndex.back();
288  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
289  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
290  }
291  } while (!PPLevelBranchIndex.empty());
292 }
293 
294 void UnwrappedLineParser::parseFile() {
295  // The top-level context in a file always has declarations, except for pre-
296  // processor directives and JavaScript files.
297  bool MustBeDeclaration =
298  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
299  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
300  MustBeDeclaration);
301  if (Style.Language == FormatStyle::LK_TextProto)
302  parseBracedList();
303  else
304  parseLevel(/*HasOpeningBrace=*/false);
305  // Make sure to format the remaining tokens.
306  //
307  // LK_TextProto is special since its top-level is parsed as the body of a
308  // braced list, which does not necessarily have natural line separators such
309  // as a semicolon. Comments after the last entry that have been determined to
310  // not belong to that line, as in:
311  // key: value
312  // // endfile comment
313  // do not have a chance to be put on a line of their own until this point.
314  // Here we add this newline before end-of-file comments.
315  if (Style.Language == FormatStyle::LK_TextProto &&
316  !CommentsBeforeNextToken.empty())
317  addUnwrappedLine();
318  flushComments(true);
319  addUnwrappedLine();
320 }
321 
322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
323  bool SwitchLabelEncountered = false;
324  do {
325  tok::TokenKind kind = FormatTok->Tok.getKind();
326  if (FormatTok->Type == TT_MacroBlockBegin) {
327  kind = tok::l_brace;
328  } else if (FormatTok->Type == TT_MacroBlockEnd) {
329  kind = tok::r_brace;
330  }
331 
332  switch (kind) {
333  case tok::comment:
334  nextToken();
335  addUnwrappedLine();
336  break;
337  case tok::l_brace:
338  // FIXME: Add parameter whether this can happen - if this happens, we must
339  // be in a non-declaration context.
340  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
341  continue;
342  parseBlock(/*MustBeDeclaration=*/false);
343  addUnwrappedLine();
344  break;
345  case tok::r_brace:
346  if (HasOpeningBrace)
347  return;
348  nextToken();
349  addUnwrappedLine();
350  break;
351  case tok::kw_default: {
352  unsigned StoredPosition = Tokens->getPosition();
353  FormatToken *Next;
354  do {
355  Next = Tokens->getNextToken();
356  } while (Next && Next->is(tok::comment));
357  FormatTok = Tokens->setPosition(StoredPosition);
358  if (Next && Next->isNot(tok::colon)) {
359  // default not followed by ':' is not a case label; treat it like
360  // an identifier.
361  parseStructuralElement();
362  break;
363  }
364  // Else, if it is 'default:', fall through to the case handling.
365  LLVM_FALLTHROUGH;
366  }
367  case tok::kw_case:
368  if (Style.Language == FormatStyle::LK_JavaScript &&
369  Line->MustBeDeclaration) {
370  // A 'case: string' style field declaration.
371  parseStructuralElement();
372  break;
373  }
374  if (!SwitchLabelEncountered &&
375  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
376  ++Line->Level;
377  SwitchLabelEncountered = true;
378  parseStructuralElement();
379  break;
380  default:
381  parseStructuralElement();
382  break;
383  }
384  } while (!eof());
385 }
386 
387 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
388  // We'll parse forward through the tokens until we hit
389  // a closing brace or eof - note that getNextToken() will
390  // parse macros, so this will magically work inside macro
391  // definitions, too.
392  unsigned StoredPosition = Tokens->getPosition();
393  FormatToken *Tok = FormatTok;
394  const FormatToken *PrevTok = Tok->Previous;
395  // Keep a stack of positions of lbrace tokens. We will
396  // update information about whether an lbrace starts a
397  // braced init list or a different block during the loop.
398  SmallVector<FormatToken *, 8> LBraceStack;
399  assert(Tok->Tok.is(tok::l_brace));
400  do {
401  // Get next non-comment token.
402  FormatToken *NextTok;
403  unsigned ReadTokens = 0;
404  do {
405  NextTok = Tokens->getNextToken();
406  ++ReadTokens;
407  } while (NextTok->is(tok::comment));
408 
409  switch (Tok->Tok.getKind()) {
410  case tok::l_brace:
411  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
412  if (PrevTok->isOneOf(tok::colon, tok::less))
413  // A ':' indicates this code is in a type, or a braced list
414  // following a label in an object literal ({a: {b: 1}}).
415  // A '<' could be an object used in a comparison, but that is nonsense
416  // code (can never return true), so more likely it is a generic type
417  // argument (`X<{a: string; b: number}>`).
418  // The code below could be confused by semicolons between the
419  // individual members in a type member list, which would normally
420  // trigger BK_Block. In both cases, this must be parsed as an inline
421  // braced init.
422  Tok->BlockKind = BK_BracedInit;
423  else if (PrevTok->is(tok::r_paren))
424  // `) { }` can only occur in function or method declarations in JS.
425  Tok->BlockKind = BK_Block;
426  } else {
427  Tok->BlockKind = BK_Unknown;
428  }
429  LBraceStack.push_back(Tok);
430  break;
431  case tok::r_brace:
432  if (LBraceStack.empty())
433  break;
434  if (LBraceStack.back()->BlockKind == BK_Unknown) {
435  bool ProbablyBracedList = false;
436  if (Style.Language == FormatStyle::LK_Proto) {
437  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
438  } else {
439  // Using OriginalColumn to distinguish between ObjC methods and
440  // binary operators is a bit hacky.
441  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
442  NextTok->OriginalColumn == 0;
443 
444  // If there is a comma, semicolon or right paren after the closing
445  // brace, we assume this is a braced initializer list. Note that
446  // regardless how we mark inner braces here, we will overwrite the
447  // BlockKind later if we parse a braced list (where all blocks
448  // inside are by default braced lists), or when we explicitly detect
449  // blocks (for example while parsing lambdas).
450  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
451  // braced list in JS.
452  ProbablyBracedList =
454  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
455  Keywords.kw_as)) ||
456  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
457  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
458  tok::r_paren, tok::r_square, tok::l_brace,
459  tok::ellipsis) ||
460  (NextTok->is(tok::identifier) &&
461  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
462  (NextTok->is(tok::semi) &&
463  (!ExpectClassBody || LBraceStack.size() != 1)) ||
464  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
465  if (NextTok->is(tok::l_square)) {
466  // We can have an array subscript after a braced init
467  // list, but C++11 attributes are expected after blocks.
468  NextTok = Tokens->getNextToken();
469  ++ReadTokens;
470  ProbablyBracedList = NextTok->isNot(tok::l_square);
471  }
472  }
473  if (ProbablyBracedList) {
474  Tok->BlockKind = BK_BracedInit;
475  LBraceStack.back()->BlockKind = BK_BracedInit;
476  } else {
477  Tok->BlockKind = BK_Block;
478  LBraceStack.back()->BlockKind = BK_Block;
479  }
480  }
481  LBraceStack.pop_back();
482  break;
483  case tok::identifier:
484  if (!Tok->is(TT_StatementMacro))
485  break;
486  LLVM_FALLTHROUGH;
487  case tok::at:
488  case tok::semi:
489  case tok::kw_if:
490  case tok::kw_while:
491  case tok::kw_for:
492  case tok::kw_switch:
493  case tok::kw_try:
494  case tok::kw___try:
495  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
496  LBraceStack.back()->BlockKind = BK_Block;
497  break;
498  default:
499  break;
500  }
501  PrevTok = Tok;
502  Tok = NextTok;
503  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
504 
505  // Assume other blocks for all unclosed opening braces.
506  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
507  if (LBraceStack[i]->BlockKind == BK_Unknown)
508  LBraceStack[i]->BlockKind = BK_Block;
509  }
510 
511  FormatTok = Tokens->setPosition(StoredPosition);
512 }
513 
514 template <class T>
515 static inline void hash_combine(std::size_t &seed, const T &v) {
516  std::hash<T> hasher;
517  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
518 }
519 
520 size_t UnwrappedLineParser::computePPHash() const {
521  size_t h = 0;
522  for (const auto &i : PPStack) {
523  hash_combine(h, size_t(i.Kind));
524  hash_combine(h, i.Line);
525  }
526  return h;
527 }
528 
529 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
530  bool MunchSemi) {
531  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
532  "'{' or macro block token expected");
533  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
534  FormatTok->BlockKind = BK_Block;
535 
536  size_t PPStartHash = computePPHash();
537 
538  unsigned InitialLevel = Line->Level;
539  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
540 
541  if (MacroBlock && FormatTok->is(tok::l_paren))
542  parseParens();
543 
544  size_t NbPreprocessorDirectives =
545  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
546  addUnwrappedLine();
547  size_t OpeningLineIndex =
548  CurrentLines->empty()
550  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
551 
552  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
553  MustBeDeclaration);
554  if (AddLevel)
555  ++Line->Level;
556  parseLevel(/*HasOpeningBrace=*/true);
557 
558  if (eof())
559  return;
560 
561  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
562  : !FormatTok->is(tok::r_brace)) {
563  Line->Level = InitialLevel;
564  FormatTok->BlockKind = BK_Block;
565  return;
566  }
567 
568  size_t PPEndHash = computePPHash();
569 
570  // Munch the closing brace.
571  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
572 
573  if (MacroBlock && FormatTok->is(tok::l_paren))
574  parseParens();
575 
576  if (MunchSemi && FormatTok->Tok.is(tok::semi))
577  nextToken();
578  Line->Level = InitialLevel;
579 
580  if (PPStartHash == PPEndHash) {
581  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
582  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
583  // Update the opening line to add the forward reference as well
584  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
585  CurrentLines->size() - 1;
586  }
587  }
588 }
589 
590 static bool isGoogScope(const UnwrappedLine &Line) {
591  // FIXME: Closure-library specific stuff should not be hard-coded but be
592  // configurable.
593  if (Line.Tokens.size() < 4)
594  return false;
595  auto I = Line.Tokens.begin();
596  if (I->Tok->TokenText != "goog")
597  return false;
598  ++I;
599  if (I->Tok->isNot(tok::period))
600  return false;
601  ++I;
602  if (I->Tok->TokenText != "scope")
603  return false;
604  ++I;
605  return I->Tok->is(tok::l_paren);
606 }
607 
608 static bool isIIFE(const UnwrappedLine &Line,
609  const AdditionalKeywords &Keywords) {
610  // Look for the start of an immediately invoked anonymous function.
611  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
612  // This is commonly done in JavaScript to create a new, anonymous scope.
613  // Example: (function() { ... })()
614  if (Line.Tokens.size() < 3)
615  return false;
616  auto I = Line.Tokens.begin();
617  if (I->Tok->isNot(tok::l_paren))
618  return false;
619  ++I;
620  if (I->Tok->isNot(Keywords.kw_function))
621  return false;
622  ++I;
623  return I->Tok->is(tok::l_paren);
624 }
625 
626 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
627  const FormatToken &InitialToken) {
628  if (InitialToken.is(tok::kw_namespace))
629  return Style.BraceWrapping.AfterNamespace;
630  if (InitialToken.is(tok::kw_class))
631  return Style.BraceWrapping.AfterClass;
632  if (InitialToken.is(tok::kw_union))
633  return Style.BraceWrapping.AfterUnion;
634  if (InitialToken.is(tok::kw_struct))
635  return Style.BraceWrapping.AfterStruct;
636  return false;
637 }
638 
639 void UnwrappedLineParser::parseChildBlock() {
640  FormatTok->BlockKind = BK_Block;
641  nextToken();
642  {
643  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
644  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
645  ScopedLineState LineState(*this);
646  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
647  /*MustBeDeclaration=*/false);
648  Line->Level += SkipIndent ? 0 : 1;
649  parseLevel(/*HasOpeningBrace=*/true);
650  flushComments(isOnNewLine(*FormatTok));
651  Line->Level -= SkipIndent ? 0 : 1;
652  }
653  nextToken();
654 }
655 
656 void UnwrappedLineParser::parsePPDirective() {
657  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
658  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
659  nextToken();
660 
661  if (!FormatTok->Tok.getIdentifierInfo()) {
662  parsePPUnknown();
663  return;
664  }
665 
666  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
667  case tok::pp_define:
668  parsePPDefine();
669  return;
670  case tok::pp_if:
671  parsePPIf(/*IfDef=*/false);
672  break;
673  case tok::pp_ifdef:
674  case tok::pp_ifndef:
675  parsePPIf(/*IfDef=*/true);
676  break;
677  case tok::pp_else:
678  parsePPElse();
679  break;
680  case tok::pp_elif:
681  parsePPElIf();
682  break;
683  case tok::pp_endif:
684  parsePPEndIf();
685  break;
686  default:
687  parsePPUnknown();
688  break;
689  }
690 }
691 
692 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
693  size_t Line = CurrentLines->size();
694  if (CurrentLines == &PreprocessorDirectives)
695  Line += Lines.size();
696 
697  if (Unreachable ||
698  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
699  PPStack.push_back({PP_Unreachable, Line});
700  else
701  PPStack.push_back({PP_Conditional, Line});
702 }
703 
704 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
705  ++PPBranchLevel;
706  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
707  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
708  PPLevelBranchIndex.push_back(0);
709  PPLevelBranchCount.push_back(0);
710  }
711  PPChainBranchIndex.push(0);
712  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
713  conditionalCompilationCondition(Unreachable || Skip);
714 }
715 
716 void UnwrappedLineParser::conditionalCompilationAlternative() {
717  if (!PPStack.empty())
718  PPStack.pop_back();
719  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
720  if (!PPChainBranchIndex.empty())
721  ++PPChainBranchIndex.top();
722  conditionalCompilationCondition(
723  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
724  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
725 }
726 
727 void UnwrappedLineParser::conditionalCompilationEnd() {
728  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
729  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
730  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
731  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
732  }
733  }
734  // Guard against #endif's without #if.
735  if (PPBranchLevel > -1)
736  --PPBranchLevel;
737  if (!PPChainBranchIndex.empty())
738  PPChainBranchIndex.pop();
739  if (!PPStack.empty())
740  PPStack.pop_back();
741 }
742 
743 void UnwrappedLineParser::parsePPIf(bool IfDef) {
744  bool IfNDef = FormatTok->is(tok::pp_ifndef);
745  nextToken();
746  bool Unreachable = false;
747  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
748  Unreachable = true;
749  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
750  Unreachable = true;
751  conditionalCompilationStart(Unreachable);
752  FormatToken *IfCondition = FormatTok;
753  // If there's a #ifndef on the first line, and the only lines before it are
754  // comments, it could be an include guard.
755  bool MaybeIncludeGuard = IfNDef;
756  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
757  for (auto &Line : Lines) {
758  if (!Line.Tokens.front().Tok->is(tok::comment)) {
759  MaybeIncludeGuard = false;
760  IncludeGuard = IG_Rejected;
761  break;
762  }
763  }
764  --PPBranchLevel;
765  parsePPUnknown();
766  ++PPBranchLevel;
767  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
768  IncludeGuard = IG_IfNdefed;
769  IncludeGuardToken = IfCondition;
770  }
771 }
772 
773 void UnwrappedLineParser::parsePPElse() {
774  // If a potential include guard has an #else, it's not an include guard.
775  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
776  IncludeGuard = IG_Rejected;
777  conditionalCompilationAlternative();
778  if (PPBranchLevel > -1)
779  --PPBranchLevel;
780  parsePPUnknown();
781  ++PPBranchLevel;
782 }
783 
784 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
785 
786 void UnwrappedLineParser::parsePPEndIf() {
787  conditionalCompilationEnd();
788  parsePPUnknown();
789  // If the #endif of a potential include guard is the last thing in the file,
790  // then we found an include guard.
791  unsigned TokenPosition = Tokens->getPosition();
792  FormatToken *PeekNext = AllTokens[TokenPosition];
793  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
794  PeekNext->is(tok::eof) &&
796  IncludeGuard = IG_Found;
797 }
798 
799 void UnwrappedLineParser::parsePPDefine() {
800  nextToken();
801 
802  if (FormatTok->Tok.getKind() != tok::identifier) {
803  IncludeGuard = IG_Rejected;
804  IncludeGuardToken = nullptr;
805  parsePPUnknown();
806  return;
807  }
808 
809  if (IncludeGuard == IG_IfNdefed &&
810  IncludeGuardToken->TokenText == FormatTok->TokenText) {
811  IncludeGuard = IG_Defined;
812  IncludeGuardToken = nullptr;
813  for (auto &Line : Lines) {
814  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
815  IncludeGuard = IG_Rejected;
816  break;
817  }
818  }
819  }
820 
821  nextToken();
822  if (FormatTok->Tok.getKind() == tok::l_paren &&
823  FormatTok->WhitespaceRange.getBegin() ==
824  FormatTok->WhitespaceRange.getEnd()) {
825  parseParens();
826  }
828  Line->Level += PPBranchLevel + 1;
829  addUnwrappedLine();
830  ++Line->Level;
831 
832  // Errors during a preprocessor directive can only affect the layout of the
833  // preprocessor directive, and thus we ignore them. An alternative approach
834  // would be to use the same approach we use on the file level (no
835  // re-indentation if there was a structural error) within the macro
836  // definition.
837  parseFile();
838 }
839 
840 void UnwrappedLineParser::parsePPUnknown() {
841  do {
842  nextToken();
843  } while (!eof());
845  Line->Level += PPBranchLevel + 1;
846  addUnwrappedLine();
847 }
848 
849 // Here we blacklist certain tokens that are not usually the first token in an
850 // unwrapped line. This is used in attempt to distinguish macro calls without
851 // trailing semicolons from other constructs split to several lines.
852 static bool tokenCanStartNewLine(const clang::Token &Tok) {
853  // Semicolon can be a null-statement, l_square can be a start of a macro or
854  // a C++11 attribute, but this doesn't seem to be common.
855  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
856  Tok.isNot(tok::l_square) &&
857  // Tokens that can only be used as binary operators and a part of
858  // overloaded operator names.
859  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
860  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
861  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
862  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
863  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
864  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
865  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
866  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
867  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
868  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
869  Tok.isNot(tok::lesslessequal) &&
870  // Colon is used in labels, base class lists, initializer lists,
871  // range-based for loops, ternary operator, but should never be the
872  // first token in an unwrapped line.
873  Tok.isNot(tok::colon) &&
874  // 'noexcept' is a trailing annotation.
875  Tok.isNot(tok::kw_noexcept);
876 }
877 
878 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
879  const FormatToken *FormatTok) {
880  // FIXME: This returns true for C/C++ keywords like 'struct'.
881  return FormatTok->is(tok::identifier) &&
882  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
883  !FormatTok->isOneOf(
884  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
885  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
886  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
887  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
888  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
889  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
890  Keywords.kw_from));
891 }
892 
893 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
894  const FormatToken *FormatTok) {
895  return FormatTok->Tok.isLiteral() ||
896  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
897  mustBeJSIdent(Keywords, FormatTok);
898 }
899 
900 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
901 // when encountered after a value (see mustBeJSIdentOrValue).
902 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
903  const FormatToken *FormatTok) {
904  return FormatTok->isOneOf(
905  tok::kw_return, Keywords.kw_yield,
906  // conditionals
907  tok::kw_if, tok::kw_else,
908  // loops
909  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
910  // switch/case
911  tok::kw_switch, tok::kw_case,
912  // exceptions
913  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
914  // declaration
915  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
916  Keywords.kw_async, Keywords.kw_function,
917  // import/export
918  Keywords.kw_import, tok::kw_export);
919 }
920 
921 // readTokenWithJavaScriptASI reads the next token and terminates the current
922 // line if JavaScript Automatic Semicolon Insertion must
923 // happen between the current token and the next token.
924 //
925 // This method is conservative - it cannot cover all edge cases of JavaScript,
926 // but only aims to correctly handle certain well known cases. It *must not*
927 // return true in speculative cases.
928 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
929  FormatToken *Previous = FormatTok;
930  readToken();
931  FormatToken *Next = FormatTok;
932 
933  bool IsOnSameLine =
934  CommentsBeforeNextToken.empty()
935  ? Next->NewlinesBefore == 0
936  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
937  if (IsOnSameLine)
938  return;
939 
940  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
941  bool PreviousStartsTemplateExpr =
942  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
943  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
944  // If the line contains an '@' sign, the previous token might be an
945  // annotation, which can precede another identifier/value.
946  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
947  [](UnwrappedLineNode &LineNode) {
948  return LineNode.Tok->is(tok::at);
949  }) != Line->Tokens.end();
950  if (HasAt)
951  return;
952  }
953  if (Next->is(tok::exclaim) && PreviousMustBeValue)
954  return addUnwrappedLine();
955  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
956  bool NextEndsTemplateExpr =
957  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
958  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
959  (PreviousMustBeValue ||
960  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
961  tok::minusminus)))
962  return addUnwrappedLine();
963  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
964  isJSDeclOrStmt(Keywords, Next))
965  return addUnwrappedLine();
966 }
967 
968 void UnwrappedLineParser::parseStructuralElement() {
969  assert(!FormatTok->is(tok::l_brace));
970  if (Style.Language == FormatStyle::LK_TableGen &&
971  FormatTok->is(tok::pp_include)) {
972  nextToken();
973  if (FormatTok->is(tok::string_literal))
974  nextToken();
975  addUnwrappedLine();
976  return;
977  }
978  switch (FormatTok->Tok.getKind()) {
979  case tok::kw_asm:
980  nextToken();
981  if (FormatTok->is(tok::l_brace)) {
982  FormatTok->Type = TT_InlineASMBrace;
983  nextToken();
984  while (FormatTok && FormatTok->isNot(tok::eof)) {
985  if (FormatTok->is(tok::r_brace)) {
986  FormatTok->Type = TT_InlineASMBrace;
987  nextToken();
988  addUnwrappedLine();
989  break;
990  }
991  FormatTok->Finalized = true;
992  nextToken();
993  }
994  }
995  break;
996  case tok::kw_namespace:
997  parseNamespace();
998  return;
999  case tok::kw_public:
1000  case tok::kw_protected:
1001  case tok::kw_private:
1002  if (Style.Language == FormatStyle::LK_Java ||
1004  nextToken();
1005  else
1006  parseAccessSpecifier();
1007  return;
1008  case tok::kw_if:
1009  parseIfThenElse();
1010  return;
1011  case tok::kw_for:
1012  case tok::kw_while:
1013  parseForOrWhileLoop();
1014  return;
1015  case tok::kw_do:
1016  parseDoWhile();
1017  return;
1018  case tok::kw_switch:
1019  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020  // 'switch: string' field declaration.
1021  break;
1022  parseSwitch();
1023  return;
1024  case tok::kw_default:
1025  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026  // 'default: string' field declaration.
1027  break;
1028  nextToken();
1029  if (FormatTok->is(tok::colon)) {
1030  parseLabel();
1031  return;
1032  }
1033  // e.g. "default void f() {}" in a Java interface.
1034  break;
1035  case tok::kw_case:
1036  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037  // 'case: string' field declaration.
1038  break;
1039  parseCaseLabel();
1040  return;
1041  case tok::kw_try:
1042  case tok::kw___try:
1043  parseTryCatch();
1044  return;
1045  case tok::kw_extern:
1046  nextToken();
1047  if (FormatTok->Tok.is(tok::string_literal)) {
1048  nextToken();
1049  if (FormatTok->Tok.is(tok::l_brace)) {
1050  if (Style.BraceWrapping.AfterExternBlock) {
1051  addUnwrappedLine();
1052  parseBlock(/*MustBeDeclaration=*/true);
1053  } else {
1054  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055  }
1056  addUnwrappedLine();
1057  return;
1058  }
1059  }
1060  break;
1061  case tok::kw_export:
1062  if (Style.Language == FormatStyle::LK_JavaScript) {
1063  parseJavaScriptEs6ImportExport();
1064  return;
1065  }
1066  if (!Style.isCpp())
1067  break;
1068  // Handle C++ "(inline|export) namespace".
1069  LLVM_FALLTHROUGH;
1070  case tok::kw_inline:
1071  nextToken();
1072  if (FormatTok->Tok.is(tok::kw_namespace)) {
1073  parseNamespace();
1074  return;
1075  }
1076  break;
1077  case tok::identifier:
1078  if (FormatTok->is(TT_ForEachMacro)) {
1079  parseForOrWhileLoop();
1080  return;
1081  }
1082  if (FormatTok->is(TT_MacroBlockBegin)) {
1083  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1084  /*MunchSemi=*/false);
1085  return;
1086  }
1087  if (FormatTok->is(Keywords.kw_import)) {
1088  if (Style.Language == FormatStyle::LK_JavaScript) {
1089  parseJavaScriptEs6ImportExport();
1090  return;
1091  }
1092  if (Style.Language == FormatStyle::LK_Proto) {
1093  nextToken();
1094  if (FormatTok->is(tok::kw_public))
1095  nextToken();
1096  if (!FormatTok->is(tok::string_literal))
1097  return;
1098  nextToken();
1099  if (FormatTok->is(tok::semi))
1100  nextToken();
1101  addUnwrappedLine();
1102  return;
1103  }
1104  }
1105  if (Style.isCpp() &&
1106  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1107  Keywords.kw_slots, Keywords.kw_qslots)) {
1108  nextToken();
1109  if (FormatTok->is(tok::colon)) {
1110  nextToken();
1111  addUnwrappedLine();
1112  return;
1113  }
1114  }
1115  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1116  parseStatementMacro();
1117  return;
1118  }
1119  // In all other cases, parse the declaration.
1120  break;
1121  default:
1122  break;
1123  }
1124  do {
1125  const FormatToken *Previous = FormatTok->Previous;
1126  switch (FormatTok->Tok.getKind()) {
1127  case tok::at:
1128  nextToken();
1129  if (FormatTok->Tok.is(tok::l_brace)) {
1130  nextToken();
1131  parseBracedList();
1132  break;
1133  } else if (Style.Language == FormatStyle::LK_Java &&
1134  FormatTok->is(Keywords.kw_interface)) {
1135  nextToken();
1136  break;
1137  }
1138  switch (FormatTok->Tok.getObjCKeywordID()) {
1139  case tok::objc_public:
1140  case tok::objc_protected:
1141  case tok::objc_package:
1142  case tok::objc_private:
1143  return parseAccessSpecifier();
1144  case tok::objc_interface:
1145  case tok::objc_implementation:
1146  return parseObjCInterfaceOrImplementation();
1147  case tok::objc_protocol:
1148  if (parseObjCProtocol())
1149  return;
1150  break;
1151  case tok::objc_end:
1152  return; // Handled by the caller.
1153  case tok::objc_optional:
1154  case tok::objc_required:
1155  nextToken();
1156  addUnwrappedLine();
1157  return;
1158  case tok::objc_autoreleasepool:
1159  nextToken();
1160  if (FormatTok->Tok.is(tok::l_brace)) {
1162  addUnwrappedLine();
1163  parseBlock(/*MustBeDeclaration=*/false);
1164  }
1165  addUnwrappedLine();
1166  return;
1167  case tok::objc_synchronized:
1168  nextToken();
1169  if (FormatTok->Tok.is(tok::l_paren))
1170  // Skip synchronization object
1171  parseParens();
1172  if (FormatTok->Tok.is(tok::l_brace)) {
1174  addUnwrappedLine();
1175  parseBlock(/*MustBeDeclaration=*/false);
1176  }
1177  addUnwrappedLine();
1178  return;
1179  case tok::objc_try:
1180  // This branch isn't strictly necessary (the kw_try case below would
1181  // do this too after the tok::at is parsed above). But be explicit.
1182  parseTryCatch();
1183  return;
1184  default:
1185  break;
1186  }
1187  break;
1188  case tok::kw_enum:
1189  // Ignore if this is part of "template <enum ...".
1190  if (Previous && Previous->is(tok::less)) {
1191  nextToken();
1192  break;
1193  }
1194 
1195  // parseEnum falls through and does not yet add an unwrapped line as an
1196  // enum definition can start a structural element.
1197  if (!parseEnum())
1198  break;
1199  // This only applies for C++.
1200  if (!Style.isCpp()) {
1201  addUnwrappedLine();
1202  return;
1203  }
1204  break;
1205  case tok::kw_typedef:
1206  nextToken();
1207  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1208  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1209  parseEnum();
1210  break;
1211  case tok::kw_struct:
1212  case tok::kw_union:
1213  case tok::kw_class:
1214  // parseRecord falls through and does not yet add an unwrapped line as a
1215  // record declaration or definition can start a structural element.
1216  parseRecord();
1217  // This does not apply for Java and JavaScript.
1218  if (Style.Language == FormatStyle::LK_Java ||
1220  if (FormatTok->is(tok::semi))
1221  nextToken();
1222  addUnwrappedLine();
1223  return;
1224  }
1225  break;
1226  case tok::period:
1227  nextToken();
1228  // In Java, classes have an implicit static member "class".
1229  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1230  FormatTok->is(tok::kw_class))
1231  nextToken();
1232  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1233  FormatTok->Tok.getIdentifierInfo())
1234  // JavaScript only has pseudo keywords, all keywords are allowed to
1235  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1236  nextToken();
1237  break;
1238  case tok::semi:
1239  nextToken();
1240  addUnwrappedLine();
1241  return;
1242  case tok::r_brace:
1243  addUnwrappedLine();
1244  return;
1245  case tok::l_paren:
1246  parseParens();
1247  break;
1248  case tok::kw_operator:
1249  nextToken();
1250  if (FormatTok->isBinaryOperator())
1251  nextToken();
1252  break;
1253  case tok::caret:
1254  nextToken();
1255  if (FormatTok->Tok.isAnyIdentifier() ||
1256  FormatTok->isSimpleTypeSpecifier())
1257  nextToken();
1258  if (FormatTok->is(tok::l_paren))
1259  parseParens();
1260  if (FormatTok->is(tok::l_brace))
1261  parseChildBlock();
1262  break;
1263  case tok::l_brace:
1264  if (!tryToParseBracedList()) {
1265  // A block outside of parentheses must be the last part of a
1266  // structural element.
1267  // FIXME: Figure out cases where this is not true, and add projections
1268  // for them (the one we know is missing are lambdas).
1269  if (Style.BraceWrapping.AfterFunction)
1270  addUnwrappedLine();
1271  FormatTok->Type = TT_FunctionLBrace;
1272  parseBlock(/*MustBeDeclaration=*/false);
1273  addUnwrappedLine();
1274  return;
1275  }
1276  // Otherwise this was a braced init list, and the structural
1277  // element continues.
1278  break;
1279  case tok::kw_try:
1280  // We arrive here when parsing function-try blocks.
1281  if (Style.BraceWrapping.AfterFunction)
1282  addUnwrappedLine();
1283  parseTryCatch();
1284  return;
1285  case tok::identifier: {
1286  if (FormatTok->is(TT_MacroBlockEnd)) {
1287  addUnwrappedLine();
1288  return;
1289  }
1290 
1291  // Function declarations (as opposed to function expressions) are parsed
1292  // on their own unwrapped line by continuing this loop. Function
1293  // expressions (functions that are not on their own line) must not create
1294  // a new unwrapped line, so they are special cased below.
1295  size_t TokenCount = Line->Tokens.size();
1296  if (Style.Language == FormatStyle::LK_JavaScript &&
1297  FormatTok->is(Keywords.kw_function) &&
1298  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1299  Keywords.kw_async)))) {
1300  tryToParseJSFunction();
1301  break;
1302  }
1303  if ((Style.Language == FormatStyle::LK_JavaScript ||
1304  Style.Language == FormatStyle::LK_Java) &&
1305  FormatTok->is(Keywords.kw_interface)) {
1306  if (Style.Language == FormatStyle::LK_JavaScript) {
1307  // In JavaScript/TypeScript, "interface" can be used as a standalone
1308  // identifier, e.g. in `var interface = 1;`. If "interface" is
1309  // followed by another identifier, it is very like to be an actual
1310  // interface declaration.
1311  unsigned StoredPosition = Tokens->getPosition();
1312  FormatToken *Next = Tokens->getNextToken();
1313  FormatTok = Tokens->setPosition(StoredPosition);
1314  if (Next && !mustBeJSIdent(Keywords, Next)) {
1315  nextToken();
1316  break;
1317  }
1318  }
1319  parseRecord();
1320  addUnwrappedLine();
1321  return;
1322  }
1323 
1324  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1325  parseStatementMacro();
1326  return;
1327  }
1328 
1329  // See if the following token should start a new unwrapped line.
1330  StringRef Text = FormatTok->TokenText;
1331  nextToken();
1332  if (Line->Tokens.size() == 1 &&
1333  // JS doesn't have macros, and within classes colons indicate fields,
1334  // not labels.
1336  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1337  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1338  parseLabel();
1339  return;
1340  }
1341  // Recognize function-like macro usages without trailing semicolon as
1342  // well as free-standing macros like Q_OBJECT.
1343  bool FunctionLike = FormatTok->is(tok::l_paren);
1344  if (FunctionLike)
1345  parseParens();
1346 
1347  bool FollowedByNewline =
1348  CommentsBeforeNextToken.empty()
1349  ? FormatTok->NewlinesBefore > 0
1350  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1351 
1352  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1353  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1354  addUnwrappedLine();
1355  return;
1356  }
1357  }
1358  break;
1359  }
1360  case tok::equal:
1361  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1362  // TT_JsFatArrow. The always start an expression or a child block if
1363  // followed by a curly.
1364  if (FormatTok->is(TT_JsFatArrow)) {
1365  nextToken();
1366  if (FormatTok->is(tok::l_brace))
1367  parseChildBlock();
1368  break;
1369  }
1370 
1371  nextToken();
1372  if (FormatTok->Tok.is(tok::l_brace)) {
1373  nextToken();
1374  parseBracedList();
1375  } else if (Style.Language == FormatStyle::LK_Proto &&
1376  FormatTok->Tok.is(tok::less)) {
1377  nextToken();
1378  parseBracedList(/*ContinueOnSemicolons=*/false,
1379  /*ClosingBraceKind=*/tok::greater);
1380  }
1381  break;
1382  case tok::l_square:
1383  parseSquare();
1384  break;
1385  case tok::kw_new:
1386  parseNew();
1387  break;
1388  default:
1389  nextToken();
1390  break;
1391  }
1392  } while (!eof());
1393 }
1394 
1395 bool UnwrappedLineParser::tryToParseLambda() {
1396  if (!Style.isCpp()) {
1397  nextToken();
1398  return false;
1399  }
1400  assert(FormatTok->is(tok::l_square));
1401  FormatToken &LSquare = *FormatTok;
1402  if (!tryToParseLambdaIntroducer())
1403  return false;
1404 
1405  while (FormatTok->isNot(tok::l_brace)) {
1406  if (FormatTok->isSimpleTypeSpecifier()) {
1407  nextToken();
1408  continue;
1409  }
1410  switch (FormatTok->Tok.getKind()) {
1411  case tok::l_brace:
1412  break;
1413  case tok::l_paren:
1414  parseParens();
1415  break;
1416  case tok::amp:
1417  case tok::star:
1418  case tok::kw_const:
1419  case tok::comma:
1420  case tok::less:
1421  case tok::greater:
1422  case tok::identifier:
1423  case tok::numeric_constant:
1424  case tok::coloncolon:
1425  case tok::kw_mutable:
1426  nextToken();
1427  break;
1428  case tok::arrow:
1429  FormatTok->Type = TT_LambdaArrow;
1430  nextToken();
1431  break;
1432  default:
1433  return true;
1434  }
1435  }
1436  LSquare.Type = TT_LambdaLSquare;
1437  parseChildBlock();
1438  return true;
1439 }
1440 
1441 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1442  const FormatToken *Previous = FormatTok->Previous;
1443  if (Previous &&
1444  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1445  tok::kw_delete, tok::l_square) ||
1446  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1447  Previous->isSimpleTypeSpecifier())) {
1448  nextToken();
1449  return false;
1450  }
1451  nextToken();
1452  if (FormatTok->is(tok::l_square)) {
1453  return false;
1454  }
1455  parseSquare(/*LambdaIntroducer=*/true);
1456  return true;
1457 }
1458 
1459 void UnwrappedLineParser::tryToParseJSFunction() {
1460  assert(FormatTok->is(Keywords.kw_function) ||
1461  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1462  if (FormatTok->is(Keywords.kw_async))
1463  nextToken();
1464  // Consume "function".
1465  nextToken();
1466 
1467  // Consume * (generator function). Treat it like C++'s overloaded operators.
1468  if (FormatTok->is(tok::star)) {
1469  FormatTok->Type = TT_OverloadedOperator;
1470  nextToken();
1471  }
1472 
1473  // Consume function name.
1474  if (FormatTok->is(tok::identifier))
1475  nextToken();
1476 
1477  if (FormatTok->isNot(tok::l_paren))
1478  return;
1479 
1480  // Parse formal parameter list.
1481  parseParens();
1482 
1483  if (FormatTok->is(tok::colon)) {
1484  // Parse a type definition.
1485  nextToken();
1486 
1487  // Eat the type declaration. For braced inline object types, balance braces,
1488  // otherwise just parse until finding an l_brace for the function body.
1489  if (FormatTok->is(tok::l_brace))
1490  tryToParseBracedList();
1491  else
1492  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1493  nextToken();
1494  }
1495 
1496  if (FormatTok->is(tok::semi))
1497  return;
1498 
1499  parseChildBlock();
1500 }
1501 
1502 bool UnwrappedLineParser::tryToParseBracedList() {
1503  if (FormatTok->BlockKind == BK_Unknown)
1504  calculateBraceTypes();
1505  assert(FormatTok->BlockKind != BK_Unknown);
1506  if (FormatTok->BlockKind == BK_Block)
1507  return false;
1508  nextToken();
1509  parseBracedList();
1510  return true;
1511 }
1512 
1513 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1514  tok::TokenKind ClosingBraceKind) {
1515  bool HasError = false;
1516 
1517  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1518  // replace this by using parseAssigmentExpression() inside.
1519  do {
1520  if (Style.Language == FormatStyle::LK_JavaScript) {
1521  if (FormatTok->is(Keywords.kw_function) ||
1522  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1523  tryToParseJSFunction();
1524  continue;
1525  }
1526  if (FormatTok->is(TT_JsFatArrow)) {
1527  nextToken();
1528  // Fat arrows can be followed by simple expressions or by child blocks
1529  // in curly braces.
1530  if (FormatTok->is(tok::l_brace)) {
1531  parseChildBlock();
1532  continue;
1533  }
1534  }
1535  if (FormatTok->is(tok::l_brace)) {
1536  // Could be a method inside of a braced list `{a() { return 1; }}`.
1537  if (tryToParseBracedList())
1538  continue;
1539  parseChildBlock();
1540  }
1541  }
1542  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1543  nextToken();
1544  return !HasError;
1545  }
1546  switch (FormatTok->Tok.getKind()) {
1547  case tok::caret:
1548  nextToken();
1549  if (FormatTok->is(tok::l_brace)) {
1550  parseChildBlock();
1551  }
1552  break;
1553  case tok::l_square:
1554  tryToParseLambda();
1555  break;
1556  case tok::l_paren:
1557  parseParens();
1558  // JavaScript can just have free standing methods and getters/setters in
1559  // object literals. Detect them by a "{" following ")".
1560  if (Style.Language == FormatStyle::LK_JavaScript) {
1561  if (FormatTok->is(tok::l_brace))
1562  parseChildBlock();
1563  break;
1564  }
1565  break;
1566  case tok::l_brace:
1567  // Assume there are no blocks inside a braced init list apart
1568  // from the ones we explicitly parse out (like lambdas).
1569  FormatTok->BlockKind = BK_BracedInit;
1570  nextToken();
1571  parseBracedList();
1572  break;
1573  case tok::less:
1574  if (Style.Language == FormatStyle::LK_Proto) {
1575  nextToken();
1576  parseBracedList(/*ContinueOnSemicolons=*/false,
1577  /*ClosingBraceKind=*/tok::greater);
1578  } else {
1579  nextToken();
1580  }
1581  break;
1582  case tok::semi:
1583  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1584  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1585  // used for error recovery if we have otherwise determined that this is
1586  // a braced list.
1587  if (Style.Language == FormatStyle::LK_JavaScript) {
1588  nextToken();
1589  break;
1590  }
1591  HasError = true;
1592  if (!ContinueOnSemicolons)
1593  return !HasError;
1594  nextToken();
1595  break;
1596  case tok::comma:
1597  nextToken();
1598  break;
1599  default:
1600  nextToken();
1601  break;
1602  }
1603  } while (!eof());
1604  return false;
1605 }
1606 
1607 void UnwrappedLineParser::parseParens() {
1608  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1609  nextToken();
1610  do {
1611  switch (FormatTok->Tok.getKind()) {
1612  case tok::l_paren:
1613  parseParens();
1614  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1615  parseChildBlock();
1616  break;
1617  case tok::r_paren:
1618  nextToken();
1619  return;
1620  case tok::r_brace:
1621  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1622  return;
1623  case tok::l_square:
1624  tryToParseLambda();
1625  break;
1626  case tok::l_brace:
1627  if (!tryToParseBracedList())
1628  parseChildBlock();
1629  break;
1630  case tok::at:
1631  nextToken();
1632  if (FormatTok->Tok.is(tok::l_brace)) {
1633  nextToken();
1634  parseBracedList();
1635  }
1636  break;
1637  case tok::kw_class:
1638  if (Style.Language == FormatStyle::LK_JavaScript)
1639  parseRecord(/*ParseAsExpr=*/true);
1640  else
1641  nextToken();
1642  break;
1643  case tok::identifier:
1644  if (Style.Language == FormatStyle::LK_JavaScript &&
1645  (FormatTok->is(Keywords.kw_function) ||
1646  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1647  tryToParseJSFunction();
1648  else
1649  nextToken();
1650  break;
1651  default:
1652  nextToken();
1653  break;
1654  }
1655  } while (!eof());
1656 }
1657 
1658 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1659  if (!LambdaIntroducer) {
1660  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1661  if (tryToParseLambda())
1662  return;
1663  }
1664  do {
1665  switch (FormatTok->Tok.getKind()) {
1666  case tok::l_paren:
1667  parseParens();
1668  break;
1669  case tok::r_square:
1670  nextToken();
1671  return;
1672  case tok::r_brace:
1673  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1674  return;
1675  case tok::l_square:
1676  parseSquare();
1677  break;
1678  case tok::l_brace: {
1679  if (!tryToParseBracedList())
1680  parseChildBlock();
1681  break;
1682  }
1683  case tok::at:
1684  nextToken();
1685  if (FormatTok->Tok.is(tok::l_brace)) {
1686  nextToken();
1687  parseBracedList();
1688  }
1689  break;
1690  default:
1691  nextToken();
1692  break;
1693  }
1694  } while (!eof());
1695 }
1696 
1697 void UnwrappedLineParser::parseIfThenElse() {
1698  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1699  nextToken();
1700  if (FormatTok->Tok.is(tok::kw_constexpr))
1701  nextToken();
1702  if (FormatTok->Tok.is(tok::l_paren))
1703  parseParens();
1704  bool NeedsUnwrappedLine = false;
1705  if (FormatTok->Tok.is(tok::l_brace)) {
1706  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1707  parseBlock(/*MustBeDeclaration=*/false);
1708  if (Style.BraceWrapping.BeforeElse)
1709  addUnwrappedLine();
1710  else
1711  NeedsUnwrappedLine = true;
1712  } else {
1713  addUnwrappedLine();
1714  ++Line->Level;
1715  parseStructuralElement();
1716  --Line->Level;
1717  }
1718  if (FormatTok->Tok.is(tok::kw_else)) {
1719  nextToken();
1720  if (FormatTok->Tok.is(tok::l_brace)) {
1721  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1722  parseBlock(/*MustBeDeclaration=*/false);
1723  addUnwrappedLine();
1724  } else if (FormatTok->Tok.is(tok::kw_if)) {
1725  parseIfThenElse();
1726  } else {
1727  addUnwrappedLine();
1728  ++Line->Level;
1729  parseStructuralElement();
1730  if (FormatTok->is(tok::eof))
1731  addUnwrappedLine();
1732  --Line->Level;
1733  }
1734  } else if (NeedsUnwrappedLine) {
1735  addUnwrappedLine();
1736  }
1737 }
1738 
1739 void UnwrappedLineParser::parseTryCatch() {
1740  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1741  nextToken();
1742  bool NeedsUnwrappedLine = false;
1743  if (FormatTok->is(tok::colon)) {
1744  // We are in a function try block, what comes is an initializer list.
1745  nextToken();
1746  while (FormatTok->is(tok::identifier)) {
1747  nextToken();
1748  if (FormatTok->is(tok::l_paren))
1749  parseParens();
1750  if (FormatTok->is(tok::comma))
1751  nextToken();
1752  }
1753  }
1754  // Parse try with resource.
1755  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1756  parseParens();
1757  }
1758  if (FormatTok->is(tok::l_brace)) {
1759  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1760  parseBlock(/*MustBeDeclaration=*/false);
1761  if (Style.BraceWrapping.BeforeCatch) {
1762  addUnwrappedLine();
1763  } else {
1764  NeedsUnwrappedLine = true;
1765  }
1766  } else if (!FormatTok->is(tok::kw_catch)) {
1767  // The C++ standard requires a compound-statement after a try.
1768  // If there's none, we try to assume there's a structuralElement
1769  // and try to continue.
1770  addUnwrappedLine();
1771  ++Line->Level;
1772  parseStructuralElement();
1773  --Line->Level;
1774  }
1775  while (1) {
1776  if (FormatTok->is(tok::at))
1777  nextToken();
1778  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1779  tok::kw___finally) ||
1780  ((Style.Language == FormatStyle::LK_Java ||
1782  FormatTok->is(Keywords.kw_finally)) ||
1783  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1784  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1785  break;
1786  nextToken();
1787  while (FormatTok->isNot(tok::l_brace)) {
1788  if (FormatTok->is(tok::l_paren)) {
1789  parseParens();
1790  continue;
1791  }
1792  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1793  return;
1794  nextToken();
1795  }
1796  NeedsUnwrappedLine = false;
1797  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1798  parseBlock(/*MustBeDeclaration=*/false);
1799  if (Style.BraceWrapping.BeforeCatch)
1800  addUnwrappedLine();
1801  else
1802  NeedsUnwrappedLine = true;
1803  }
1804  if (NeedsUnwrappedLine)
1805  addUnwrappedLine();
1806 }
1807 
1808 void UnwrappedLineParser::parseNamespace() {
1809  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1810 
1811  const FormatToken &InitialToken = *FormatTok;
1812  nextToken();
1813  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1814  nextToken();
1815  if (FormatTok->Tok.is(tok::l_brace)) {
1816  if (ShouldBreakBeforeBrace(Style, InitialToken))
1817  addUnwrappedLine();
1818 
1819  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1821  DeclarationScopeStack.size() > 1);
1822  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1823  // Munch the semicolon after a namespace. This is more common than one would
1824  // think. Puttin the semicolon into its own line is very ugly.
1825  if (FormatTok->Tok.is(tok::semi))
1826  nextToken();
1827  addUnwrappedLine();
1828  }
1829  // FIXME: Add error handling.
1830 }
1831 
1832 void UnwrappedLineParser::parseNew() {
1833  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1834  nextToken();
1835  if (Style.Language != FormatStyle::LK_Java)
1836  return;
1837 
1838  // In Java, we can parse everything up to the parens, which aren't optional.
1839  do {
1840  // There should not be a ;, { or } before the new's open paren.
1841  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1842  return;
1843 
1844  // Consume the parens.
1845  if (FormatTok->is(tok::l_paren)) {
1846  parseParens();
1847 
1848  // If there is a class body of an anonymous class, consume that as child.
1849  if (FormatTok->is(tok::l_brace))
1850  parseChildBlock();
1851  return;
1852  }
1853  nextToken();
1854  } while (!eof());
1855 }
1856 
1857 void UnwrappedLineParser::parseForOrWhileLoop() {
1858  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1859  "'for', 'while' or foreach macro expected");
1860  nextToken();
1861  // JS' for await ( ...
1862  if (Style.Language == FormatStyle::LK_JavaScript &&
1863  FormatTok->is(Keywords.kw_await))
1864  nextToken();
1865  if (FormatTok->Tok.is(tok::l_paren))
1866  parseParens();
1867  if (FormatTok->Tok.is(tok::l_brace)) {
1868  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1869  parseBlock(/*MustBeDeclaration=*/false);
1870  addUnwrappedLine();
1871  } else {
1872  addUnwrappedLine();
1873  ++Line->Level;
1874  parseStructuralElement();
1875  --Line->Level;
1876  }
1877 }
1878 
1879 void UnwrappedLineParser::parseDoWhile() {
1880  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1881  nextToken();
1882  if (FormatTok->Tok.is(tok::l_brace)) {
1883  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1884  parseBlock(/*MustBeDeclaration=*/false);
1885  if (Style.BraceWrapping.IndentBraces)
1886  addUnwrappedLine();
1887  } else {
1888  addUnwrappedLine();
1889  ++Line->Level;
1890  parseStructuralElement();
1891  --Line->Level;
1892  }
1893 
1894  // FIXME: Add error handling.
1895  if (!FormatTok->Tok.is(tok::kw_while)) {
1896  addUnwrappedLine();
1897  return;
1898  }
1899 
1900  nextToken();
1901  parseStructuralElement();
1902 }
1903 
1904 void UnwrappedLineParser::parseLabel() {
1905  nextToken();
1906  unsigned OldLineLevel = Line->Level;
1907  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1908  --Line->Level;
1909  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1910  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1911  parseBlock(/*MustBeDeclaration=*/false);
1912  if (FormatTok->Tok.is(tok::kw_break)) {
1914  addUnwrappedLine();
1915  parseStructuralElement();
1916  }
1917  addUnwrappedLine();
1918  } else {
1919  if (FormatTok->is(tok::semi))
1920  nextToken();
1921  addUnwrappedLine();
1922  }
1923  Line->Level = OldLineLevel;
1924  if (FormatTok->isNot(tok::l_brace)) {
1925  parseStructuralElement();
1926  addUnwrappedLine();
1927  }
1928 }
1929 
1930 void UnwrappedLineParser::parseCaseLabel() {
1931  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1932  // FIXME: fix handling of complex expressions here.
1933  do {
1934  nextToken();
1935  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1936  parseLabel();
1937 }
1938 
1939 void UnwrappedLineParser::parseSwitch() {
1940  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1941  nextToken();
1942  if (FormatTok->Tok.is(tok::l_paren))
1943  parseParens();
1944  if (FormatTok->Tok.is(tok::l_brace)) {
1945  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1946  parseBlock(/*MustBeDeclaration=*/false);
1947  addUnwrappedLine();
1948  } else {
1949  addUnwrappedLine();
1950  ++Line->Level;
1951  parseStructuralElement();
1952  --Line->Level;
1953  }
1954 }
1955 
1956 void UnwrappedLineParser::parseAccessSpecifier() {
1957  nextToken();
1958  // Understand Qt's slots.
1959  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1960  nextToken();
1961  // Otherwise, we don't know what it is, and we'd better keep the next token.
1962  if (FormatTok->Tok.is(tok::colon))
1963  nextToken();
1964  addUnwrappedLine();
1965 }
1966 
1967 bool UnwrappedLineParser::parseEnum() {
1968  // Won't be 'enum' for NS_ENUMs.
1969  if (FormatTok->Tok.is(tok::kw_enum))
1970  nextToken();
1971 
1972  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1973  // declarations. An "enum" keyword followed by a colon would be a syntax
1974  // error and thus assume it is just an identifier.
1975  if (Style.Language == FormatStyle::LK_JavaScript &&
1976  FormatTok->isOneOf(tok::colon, tok::question))
1977  return false;
1978 
1979  // Eat up enum class ...
1980  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1981  nextToken();
1982 
1983  while (FormatTok->Tok.getIdentifierInfo() ||
1984  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1985  tok::greater, tok::comma, tok::question)) {
1986  nextToken();
1987  // We can have macros or attributes in between 'enum' and the enum name.
1988  if (FormatTok->is(tok::l_paren))
1989  parseParens();
1990  if (FormatTok->is(tok::identifier)) {
1991  nextToken();
1992  // If there are two identifiers in a row, this is likely an elaborate
1993  // return type. In Java, this can be "implements", etc.
1994  if (Style.isCpp() && FormatTok->is(tok::identifier))
1995  return false;
1996  }
1997  }
1998 
1999  // Just a declaration or something is wrong.
2000  if (FormatTok->isNot(tok::l_brace))
2001  return true;
2002  FormatTok->BlockKind = BK_Block;
2003 
2004  if (Style.Language == FormatStyle::LK_Java) {
2005  // Java enums are different.
2006  parseJavaEnumBody();
2007  return true;
2008  }
2009  if (Style.Language == FormatStyle::LK_Proto) {
2010  parseBlock(/*MustBeDeclaration=*/true);
2011  return true;
2012  }
2013 
2014  // Parse enum body.
2015  nextToken();
2016  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2017  if (HasError) {
2018  if (FormatTok->is(tok::semi))
2019  nextToken();
2020  addUnwrappedLine();
2021  }
2022  return true;
2023 
2024  // There is no addUnwrappedLine() here so that we fall through to parsing a
2025  // structural element afterwards. Thus, in "enum A {} n, m;",
2026  // "} n, m;" will end up in one unwrapped line.
2027 }
2028 
2029 void UnwrappedLineParser::parseJavaEnumBody() {
2030  // Determine whether the enum is simple, i.e. does not have a semicolon or
2031  // constants with class bodies. Simple enums can be formatted like braced
2032  // lists, contracted to a single line, etc.
2033  unsigned StoredPosition = Tokens->getPosition();
2034  bool IsSimple = true;
2035  FormatToken *Tok = Tokens->getNextToken();
2036  while (Tok) {
2037  if (Tok->is(tok::r_brace))
2038  break;
2039  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2040  IsSimple = false;
2041  break;
2042  }
2043  // FIXME: This will also mark enums with braces in the arguments to enum
2044  // constants as "not simple". This is probably fine in practice, though.
2045  Tok = Tokens->getNextToken();
2046  }
2047  FormatTok = Tokens->setPosition(StoredPosition);
2048 
2049  if (IsSimple) {
2050  nextToken();
2051  parseBracedList();
2052  addUnwrappedLine();
2053  return;
2054  }
2055 
2056  // Parse the body of a more complex enum.
2057  // First add a line for everything up to the "{".
2058  nextToken();
2059  addUnwrappedLine();
2060  ++Line->Level;
2061 
2062  // Parse the enum constants.
2063  while (FormatTok) {
2064  if (FormatTok->is(tok::l_brace)) {
2065  // Parse the constant's class body.
2066  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2067  /*MunchSemi=*/false);
2068  } else if (FormatTok->is(tok::l_paren)) {
2069  parseParens();
2070  } else if (FormatTok->is(tok::comma)) {
2071  nextToken();
2072  addUnwrappedLine();
2073  } else if (FormatTok->is(tok::semi)) {
2074  nextToken();
2075  addUnwrappedLine();
2076  break;
2077  } else if (FormatTok->is(tok::r_brace)) {
2078  addUnwrappedLine();
2079  break;
2080  } else {
2081  nextToken();
2082  }
2083  }
2084 
2085  // Parse the class body after the enum's ";" if any.
2086  parseLevel(/*HasOpeningBrace=*/true);
2087  nextToken();
2088  --Line->Level;
2089  addUnwrappedLine();
2090 }
2091 
2092 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2093  const FormatToken &InitialToken = *FormatTok;
2094  nextToken();
2095 
2096  // The actual identifier can be a nested name specifier, and in macros
2097  // it is often token-pasted.
2098  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2099  tok::kw___attribute, tok::kw___declspec,
2100  tok::kw_alignas) ||
2101  ((Style.Language == FormatStyle::LK_Java ||
2103  FormatTok->isOneOf(tok::period, tok::comma))) {
2104  if (Style.Language == FormatStyle::LK_JavaScript &&
2105  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2106  // JavaScript/TypeScript supports inline object types in
2107  // extends/implements positions:
2108  // class Foo implements {bar: number} { }
2109  nextToken();
2110  if (FormatTok->is(tok::l_brace)) {
2111  tryToParseBracedList();
2112  continue;
2113  }
2114  }
2115  bool IsNonMacroIdentifier =
2116  FormatTok->is(tok::identifier) &&
2117  FormatTok->TokenText != FormatTok->TokenText.upper();
2118  nextToken();
2119  // We can have macros or attributes in between 'class' and the class name.
2120  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2121  parseParens();
2122  }
2123 
2124  // Note that parsing away template declarations here leads to incorrectly
2125  // accepting function declarations as record declarations.
2126  // In general, we cannot solve this problem. Consider:
2127  // class A<int> B() {}
2128  // which can be a function definition or a class definition when B() is a
2129  // macro. If we find enough real-world cases where this is a problem, we
2130  // can parse for the 'template' keyword in the beginning of the statement,
2131  // and thus rule out the record production in case there is no template
2132  // (this would still leave us with an ambiguity between template function
2133  // and class declarations).
2134  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2135  while (!eof()) {
2136  if (FormatTok->is(tok::l_brace)) {
2137  calculateBraceTypes(/*ExpectClassBody=*/true);
2138  if (!tryToParseBracedList())
2139  break;
2140  }
2141  if (FormatTok->Tok.is(tok::semi))
2142  return;
2143  nextToken();
2144  }
2145  }
2146  if (FormatTok->Tok.is(tok::l_brace)) {
2147  if (ParseAsExpr) {
2148  parseChildBlock();
2149  } else {
2150  if (ShouldBreakBeforeBrace(Style, InitialToken))
2151  addUnwrappedLine();
2152 
2153  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2154  /*MunchSemi=*/false);
2155  }
2156  }
2157  // There is no addUnwrappedLine() here so that we fall through to parsing a
2158  // structural element afterwards. Thus, in "class A {} n, m;",
2159  // "} n, m;" will end up in one unwrapped line.
2160 }
2161 
2162 void UnwrappedLineParser::parseObjCMethod() {
2163  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2164  "'(' or identifier expected.");
2165  do {
2166  if (FormatTok->Tok.is(tok::semi)) {
2167  nextToken();
2168  addUnwrappedLine();
2169  return;
2170  } else if (FormatTok->Tok.is(tok::l_brace)) {
2171  if (Style.BraceWrapping.AfterFunction)
2172  addUnwrappedLine();
2173  parseBlock(/*MustBeDeclaration=*/false);
2174  addUnwrappedLine();
2175  return;
2176  } else {
2177  nextToken();
2178  }
2179  } while (!eof());
2180 }
2181 
2182 void UnwrappedLineParser::parseObjCProtocolList() {
2183  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2184  do {
2185  nextToken();
2186  // Early exit in case someone forgot a close angle.
2187  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2188  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2189  return;
2190  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2191  nextToken(); // Skip '>'.
2192 }
2193 
2194 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2195  do {
2196  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2197  nextToken();
2198  addUnwrappedLine();
2199  break;
2200  }
2201  if (FormatTok->is(tok::l_brace)) {
2202  parseBlock(/*MustBeDeclaration=*/false);
2203  // In ObjC interfaces, nothing should be following the "}".
2204  addUnwrappedLine();
2205  } else if (FormatTok->is(tok::r_brace)) {
2206  // Ignore stray "}". parseStructuralElement doesn't consume them.
2207  nextToken();
2208  addUnwrappedLine();
2209  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2210  nextToken();
2211  parseObjCMethod();
2212  } else {
2213  parseStructuralElement();
2214  }
2215  } while (!eof());
2216 }
2217 
2218 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2219  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2220  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2221  nextToken();
2222  nextToken(); // interface name
2223 
2224  // @interface can be followed by a lightweight generic
2225  // specialization list, then either a base class or a category.
2226  if (FormatTok->Tok.is(tok::less)) {
2227  // Unlike protocol lists, generic parameterizations support
2228  // nested angles:
2229  //
2230  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2231  // NSObject <NSCopying, NSSecureCoding>
2232  //
2233  // so we need to count how many open angles we have left.
2234  unsigned NumOpenAngles = 1;
2235  do {
2236  nextToken();
2237  // Early exit in case someone forgot a close angle.
2238  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2239  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2240  break;
2241  if (FormatTok->Tok.is(tok::less))
2242  ++NumOpenAngles;
2243  else if (FormatTok->Tok.is(tok::greater)) {
2244  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2245  --NumOpenAngles;
2246  }
2247  } while (!eof() && NumOpenAngles != 0);
2248  nextToken(); // Skip '>'.
2249  }
2250  if (FormatTok->Tok.is(tok::colon)) {
2251  nextToken();
2252  nextToken(); // base class name
2253  } else if (FormatTok->Tok.is(tok::l_paren))
2254  // Skip category, if present.
2255  parseParens();
2256 
2257  if (FormatTok->Tok.is(tok::less))
2258  parseObjCProtocolList();
2259 
2260  if (FormatTok->Tok.is(tok::l_brace)) {
2262  addUnwrappedLine();
2263  parseBlock(/*MustBeDeclaration=*/true);
2264  }
2265 
2266  // With instance variables, this puts '}' on its own line. Without instance
2267  // variables, this ends the @interface line.
2268  addUnwrappedLine();
2269 
2270  parseObjCUntilAtEnd();
2271 }
2272 
2273 // Returns true for the declaration/definition form of @protocol,
2274 // false for the expression form.
2275 bool UnwrappedLineParser::parseObjCProtocol() {
2276  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2277  nextToken();
2278 
2279  if (FormatTok->is(tok::l_paren))
2280  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2281  return false;
2282 
2283  // The definition/declaration form,
2284  // @protocol Foo
2285  // - (int)someMethod;
2286  // @end
2287 
2288  nextToken(); // protocol name
2289 
2290  if (FormatTok->Tok.is(tok::less))
2291  parseObjCProtocolList();
2292 
2293  // Check for protocol declaration.
2294  if (FormatTok->Tok.is(tok::semi)) {
2295  nextToken();
2296  addUnwrappedLine();
2297  return true;
2298  }
2299 
2300  addUnwrappedLine();
2301  parseObjCUntilAtEnd();
2302  return true;
2303 }
2304 
2305 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2306  bool IsImport = FormatTok->is(Keywords.kw_import);
2307  assert(IsImport || FormatTok->is(tok::kw_export));
2308  nextToken();
2309 
2310  // Consume the "default" in "export default class/function".
2311  if (FormatTok->is(tok::kw_default))
2312  nextToken();
2313 
2314  // Consume "async function", "function" and "default function", so that these
2315  // get parsed as free-standing JS functions, i.e. do not require a trailing
2316  // semicolon.
2317  if (FormatTok->is(Keywords.kw_async))
2318  nextToken();
2319  if (FormatTok->is(Keywords.kw_function)) {
2320  nextToken();
2321  return;
2322  }
2323 
2324  // For imports, `export *`, `export {...}`, consume the rest of the line up
2325  // to the terminating `;`. For everything else, just return and continue
2326  // parsing the structural element, i.e. the declaration or expression for
2327  // `export default`.
2328  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2329  !FormatTok->isStringLiteral())
2330  return;
2331 
2332  while (!eof()) {
2333  if (FormatTok->is(tok::semi))
2334  return;
2335  if (Line->Tokens.empty()) {
2336  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2337  // import statement should terminate.
2338  return;
2339  }
2340  if (FormatTok->is(tok::l_brace)) {
2341  FormatTok->BlockKind = BK_Block;
2342  nextToken();
2343  parseBracedList();
2344  } else {
2345  nextToken();
2346  }
2347  }
2348 }
2349 
2350 void UnwrappedLineParser::parseStatementMacro()
2351 {
2352  nextToken();
2353  if (FormatTok->is(tok::l_paren))
2354  parseParens();
2355  if (FormatTok->is(tok::semi))
2356  nextToken();
2357  addUnwrappedLine();
2358 }
2359 
2360 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2361  StringRef Prefix = "") {
2362  llvm::dbgs() << Prefix << "Line(" << Line.Level
2363  << ", FSC=" << Line.FirstStartColumn << ")"
2364  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2365  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2366  E = Line.Tokens.end();
2367  I != E; ++I) {
2368  llvm::dbgs() << I->Tok->Tok.getName() << "["
2369  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2370  << "] ";
2371  }
2372  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2373  E = Line.Tokens.end();
2374  I != E; ++I) {
2375  const UnwrappedLineNode &Node = *I;
2377  I = Node.Children.begin(),
2378  E = Node.Children.end();
2379  I != E; ++I) {
2380  printDebugInfo(*I, "\nChild: ");
2381  }
2382  }
2383  llvm::dbgs() << "\n";
2384 }
2385 
2386 void UnwrappedLineParser::addUnwrappedLine() {
2387  if (Line->Tokens.empty())
2388  return;
2389  LLVM_DEBUG({
2390  if (CurrentLines == &Lines)
2391  printDebugInfo(*Line);
2392  });
2393  CurrentLines->push_back(std::move(*Line));
2394  Line->Tokens.clear();
2395  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2396  Line->FirstStartColumn = 0;
2397  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2398  CurrentLines->append(
2399  std::make_move_iterator(PreprocessorDirectives.begin()),
2400  std::make_move_iterator(PreprocessorDirectives.end()));
2401  PreprocessorDirectives.clear();
2402  }
2403  // Disconnect the current token from the last token on the previous line.
2404  FormatTok->Previous = nullptr;
2405 }
2406 
2407 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2408 
2409 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2410  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2411  FormatTok.NewlinesBefore > 0;
2412 }
2413 
2414 // Checks if \p FormatTok is a line comment that continues the line comment
2415 // section on \p Line.
2416 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2417  const UnwrappedLine &Line,
2418  llvm::Regex &CommentPragmasRegex) {
2419  if (Line.Tokens.empty())
2420  return false;
2421 
2422  StringRef IndentContent = FormatTok.TokenText;
2423  if (FormatTok.TokenText.startswith("//") ||
2424  FormatTok.TokenText.startswith("/*"))
2425  IndentContent = FormatTok.TokenText.substr(2);
2426  if (CommentPragmasRegex.match(IndentContent))
2427  return false;
2428 
2429  // If Line starts with a line comment, then FormatTok continues the comment
2430  // section if its original column is greater or equal to the original start
2431  // column of the line.
2432  //
2433  // Define the min column token of a line as follows: if a line ends in '{' or
2434  // contains a '{' followed by a line comment, then the min column token is
2435  // that '{'. Otherwise, the min column token of the line is the first token of
2436  // the line.
2437  //
2438  // If Line starts with a token other than a line comment, then FormatTok
2439  // continues the comment section if its original column is greater than the
2440  // original start column of the min column token of the line.
2441  //
2442  // For example, the second line comment continues the first in these cases:
2443  //
2444  // // first line
2445  // // second line
2446  //
2447  // and:
2448  //
2449  // // first line
2450  // // second line
2451  //
2452  // and:
2453  //
2454  // int i; // first line
2455  // // second line
2456  //
2457  // and:
2458  //
2459  // do { // first line
2460  // // second line
2461  // int i;
2462  // } while (true);
2463  //
2464  // and:
2465  //
2466  // enum {
2467  // a, // first line
2468  // // second line
2469  // b
2470  // };
2471  //
2472  // The second line comment doesn't continue the first in these cases:
2473  //
2474  // // first line
2475  // // second line
2476  //
2477  // and:
2478  //
2479  // int i; // first line
2480  // // second line
2481  //
2482  // and:
2483  //
2484  // do { // first line
2485  // // second line
2486  // int i;
2487  // } while (true);
2488  //
2489  // and:
2490  //
2491  // enum {
2492  // a, // first line
2493  // // second line
2494  // };
2495  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2496 
2497  // Scan for '{//'. If found, use the column of '{' as a min column for line
2498  // comment section continuation.
2499  const FormatToken *PreviousToken = nullptr;
2500  for (const UnwrappedLineNode &Node : Line.Tokens) {
2501  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2502  isLineComment(*Node.Tok)) {
2503  MinColumnToken = PreviousToken;
2504  break;
2505  }
2506  PreviousToken = Node.Tok;
2507 
2508  // Grab the last newline preceding a token in this unwrapped line.
2509  if (Node.Tok->NewlinesBefore > 0) {
2510  MinColumnToken = Node.Tok;
2511  }
2512  }
2513  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2514  MinColumnToken = PreviousToken;
2515  }
2516 
2517  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2518  MinColumnToken);
2519 }
2520 
2521 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2522  bool JustComments = Line->Tokens.empty();
2524  I = CommentsBeforeNextToken.begin(),
2525  E = CommentsBeforeNextToken.end();
2526  I != E; ++I) {
2527  // Line comments that belong to the same line comment section are put on the
2528  // same line since later we might want to reflow content between them.
2529  // Additional fine-grained breaking of line comment sections is controlled
2530  // by the class BreakableLineCommentSection in case it is desirable to keep
2531  // several line comment sections in the same unwrapped line.
2532  //
2533  // FIXME: Consider putting separate line comment sections as children to the
2534  // unwrapped line instead.
2536  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2537  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2538  addUnwrappedLine();
2539  pushToken(*I);
2540  }
2541  if (NewlineBeforeNext && JustComments)
2542  addUnwrappedLine();
2543  CommentsBeforeNextToken.clear();
2544 }
2545 
2546 void UnwrappedLineParser::nextToken(int LevelDifference) {
2547  if (eof())
2548  return;
2549  flushComments(isOnNewLine(*FormatTok));
2550  pushToken(FormatTok);
2551  FormatToken *Previous = FormatTok;
2552  if (Style.Language != FormatStyle::LK_JavaScript)
2553  readToken(LevelDifference);
2554  else
2555  readTokenWithJavaScriptASI();
2556  FormatTok->Previous = Previous;
2557 }
2558 
2559 void UnwrappedLineParser::distributeComments(
2560  const SmallVectorImpl<FormatToken *> &Comments,
2561  const FormatToken *NextTok) {
2562  // Whether or not a line comment token continues a line is controlled by
2563  // the method continuesLineCommentSection, with the following caveat:
2564  //
2565  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2566  // that each comment line from the trail is aligned with the next token, if
2567  // the next token exists. If a trail exists, the beginning of the maximal
2568  // trail is marked as a start of a new comment section.
2569  //
2570  // For example in this code:
2571  //
2572  // int a; // line about a
2573  // // line 1 about b
2574  // // line 2 about b
2575  // int b;
2576  //
2577  // the two lines about b form a maximal trail, so there are two sections, the
2578  // first one consisting of the single comment "// line about a" and the
2579  // second one consisting of the next two comments.
2580  if (Comments.empty())
2581  return;
2582  bool ShouldPushCommentsInCurrentLine = true;
2583  bool HasTrailAlignedWithNextToken = false;
2584  unsigned StartOfTrailAlignedWithNextToken = 0;
2585  if (NextTok) {
2586  // We are skipping the first element intentionally.
2587  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2588  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2589  HasTrailAlignedWithNextToken = true;
2590  StartOfTrailAlignedWithNextToken = i;
2591  }
2592  }
2593  }
2594  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2595  FormatToken *FormatTok = Comments[i];
2596  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2597  FormatTok->ContinuesLineCommentSection = false;
2598  } else {
2599  FormatTok->ContinuesLineCommentSection =
2600  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2601  }
2602  if (!FormatTok->ContinuesLineCommentSection &&
2603  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2604  ShouldPushCommentsInCurrentLine = false;
2605  }
2606  if (ShouldPushCommentsInCurrentLine) {
2607  pushToken(FormatTok);
2608  } else {
2609  CommentsBeforeNextToken.push_back(FormatTok);
2610  }
2611  }
2612 }
2613 
2614 void UnwrappedLineParser::readToken(int LevelDifference) {
2616  do {
2617  FormatTok = Tokens->getNextToken();
2618  assert(FormatTok);
2619  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2620  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2621  distributeComments(Comments, FormatTok);
2622  Comments.clear();
2623  // If there is an unfinished unwrapped line, we flush the preprocessor
2624  // directives only after that unwrapped line was finished later.
2625  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2626  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2627  assert((LevelDifference >= 0 ||
2628  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2629  "LevelDifference makes Line->Level negative");
2630  Line->Level += LevelDifference;
2631  // Comments stored before the preprocessor directive need to be output
2632  // before the preprocessor directive, at the same level as the
2633  // preprocessor directive, as we consider them to apply to the directive.
2634  flushComments(isOnNewLine(*FormatTok));
2635  parsePPDirective();
2636  }
2637  while (FormatTok->Type == TT_ConflictStart ||
2638  FormatTok->Type == TT_ConflictEnd ||
2639  FormatTok->Type == TT_ConflictAlternative) {
2640  if (FormatTok->Type == TT_ConflictStart) {
2641  conditionalCompilationStart(/*Unreachable=*/false);
2642  } else if (FormatTok->Type == TT_ConflictAlternative) {
2643  conditionalCompilationAlternative();
2644  } else if (FormatTok->Type == TT_ConflictEnd) {
2645  conditionalCompilationEnd();
2646  }
2647  FormatTok = Tokens->getNextToken();
2648  FormatTok->MustBreakBefore = true;
2649  }
2650 
2651  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2652  !Line->InPPDirective) {
2653  continue;
2654  }
2655 
2656  if (!FormatTok->Tok.is(tok::comment)) {
2657  distributeComments(Comments, FormatTok);
2658  Comments.clear();
2659  return;
2660  }
2661 
2662  Comments.push_back(FormatTok);
2663  } while (!eof());
2664 
2665  distributeComments(Comments, nullptr);
2666  Comments.clear();
2667 }
2668 
2669 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2670  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2671  if (MustBreakBeforeNextToken) {
2672  Line->Tokens.back().Tok->MustBreakBefore = true;
2673  MustBreakBeforeNextToken = false;
2674  }
2675 }
2676 
2677 } // end namespace format
2678 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:727
Indent in all namespaces.
Definition: Format.h:1318
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:68
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:128
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1231
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1079
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:215
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:741
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:160
Does not indent any directives.
Definition: Format.h:1091
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:413
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1104
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:134
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:381
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:699
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:773
Should be used for Java.
Definition: Format.h:1224
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:290
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:323
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1322
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:504
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:316
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1226
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:822
bool AfterFunction
Wrap function definitions.
Definition: Format.h:679
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:124
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:307
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:668
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:141
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:172
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:50
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1308
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1241
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:305
Should be used for TableGen code.
Definition: Format.h:1233
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:713
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:336
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:651
Indents directives after the hash.
Definition: Format.h:1100
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2532
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:166
bool AfterClass
Wrap class definitions.
Definition: Format.h:633
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1236
StringRef Text
Definition: Format.cpp:1630
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:284
bool isStringLiteral() const
Definition: FormatToken.h:347
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:695
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:138
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:178
const FormatStyle & Style