clang  8.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = llvm::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
178  Parser->addUnwrappedLine();
179  if (Style.BraceWrapping.IndentBraces)
180  ++LineLevel;
181  }
182  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
183 
184 private:
185  unsigned &LineLevel;
186  unsigned OldLineLevel;
187 };
188 
189 namespace {
190 
191 class IndexedTokenSource : public FormatTokenSource {
192 public:
193  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
194  : Tokens(Tokens), Position(-1) {}
195 
196  FormatToken *getNextToken() override {
197  ++Position;
198  return Tokens[Position];
199  }
200 
201  unsigned getPosition() override {
202  assert(Position >= 0);
203  return Position;
204  }
205 
206  FormatToken *setPosition(unsigned P) override {
207  Position = P;
208  return Tokens[Position];
209  }
210 
211  void reset() { Position = -1; }
212 
213 private:
215  int Position;
216 };
217 
218 } // end anonymous namespace
219 
221  const AdditionalKeywords &Keywords,
222  unsigned FirstStartColumn,
224  UnwrappedLineConsumer &Callback)
225  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
226  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
227  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
228  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
229  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
230  ? IG_Rejected
231  : IG_Inited),
232  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
233 
234 void UnwrappedLineParser::reset() {
235  PPBranchLevel = -1;
236  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
237  ? IG_Rejected
238  : IG_Inited;
239  IncludeGuardToken = nullptr;
240  Line.reset(new UnwrappedLine);
241  CommentsBeforeNextToken.clear();
242  FormatTok = nullptr;
243  MustBreakBeforeNextToken = false;
244  PreprocessorDirectives.clear();
245  CurrentLines = &Lines;
246  DeclarationScopeStack.clear();
247  PPStack.clear();
248  Line->FirstStartColumn = FirstStartColumn;
249 }
250 
252  IndexedTokenSource TokenSource(AllTokens);
253  Line->FirstStartColumn = FirstStartColumn;
254  do {
255  LLVM_DEBUG(llvm::dbgs() << "----\n");
256  reset();
257  Tokens = &TokenSource;
258  TokenSource.reset();
259 
260  readToken();
261  parseFile();
262 
263  // If we found an include guard then all preprocessor directives (other than
264  // the guard) are over-indented by one.
265  if (IncludeGuard == IG_Found)
266  for (auto &Line : Lines)
267  if (Line.InPPDirective && Line.Level > 0)
268  --Line.Level;
269 
270  // Create line with eof token.
271  pushToken(FormatTok);
272  addUnwrappedLine();
273 
274  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
275  E = Lines.end();
276  I != E; ++I) {
277  Callback.consumeUnwrappedLine(*I);
278  }
279  Callback.finishRun();
280  Lines.clear();
281  while (!PPLevelBranchIndex.empty() &&
282  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
283  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
284  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
285  }
286  if (!PPLevelBranchIndex.empty()) {
287  ++PPLevelBranchIndex.back();
288  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
289  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
290  }
291  } while (!PPLevelBranchIndex.empty());
292 }
293 
294 void UnwrappedLineParser::parseFile() {
295  // The top-level context in a file always has declarations, except for pre-
296  // processor directives and JavaScript files.
297  bool MustBeDeclaration =
298  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
299  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
300  MustBeDeclaration);
301  if (Style.Language == FormatStyle::LK_TextProto)
302  parseBracedList();
303  else
304  parseLevel(/*HasOpeningBrace=*/false);
305  // Make sure to format the remaining tokens.
306  //
307  // LK_TextProto is special since its top-level is parsed as the body of a
308  // braced list, which does not necessarily have natural line separators such
309  // as a semicolon. Comments after the last entry that have been determined to
310  // not belong to that line, as in:
311  // key: value
312  // // endfile comment
313  // do not have a chance to be put on a line of their own until this point.
314  // Here we add this newline before end-of-file comments.
315  if (Style.Language == FormatStyle::LK_TextProto &&
316  !CommentsBeforeNextToken.empty())
317  addUnwrappedLine();
318  flushComments(true);
319  addUnwrappedLine();
320 }
321 
322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
323  bool SwitchLabelEncountered = false;
324  do {
325  tok::TokenKind kind = FormatTok->Tok.getKind();
326  if (FormatTok->Type == TT_MacroBlockBegin) {
327  kind = tok::l_brace;
328  } else if (FormatTok->Type == TT_MacroBlockEnd) {
329  kind = tok::r_brace;
330  }
331 
332  switch (kind) {
333  case tok::comment:
334  nextToken();
335  addUnwrappedLine();
336  break;
337  case tok::l_brace:
338  // FIXME: Add parameter whether this can happen - if this happens, we must
339  // be in a non-declaration context.
340  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
341  continue;
342  parseBlock(/*MustBeDeclaration=*/false);
343  addUnwrappedLine();
344  break;
345  case tok::r_brace:
346  if (HasOpeningBrace)
347  return;
348  nextToken();
349  addUnwrappedLine();
350  break;
351  case tok::kw_default: {
352  unsigned StoredPosition = Tokens->getPosition();
353  FormatToken *Next;
354  do {
355  Next = Tokens->getNextToken();
356  } while (Next && Next->is(tok::comment));
357  FormatTok = Tokens->setPosition(StoredPosition);
358  if (Next && Next->isNot(tok::colon)) {
359  // default not followed by ':' is not a case label; treat it like
360  // an identifier.
361  parseStructuralElement();
362  break;
363  }
364  // Else, if it is 'default:', fall through to the case handling.
365  LLVM_FALLTHROUGH;
366  }
367  case tok::kw_case:
368  if (Style.Language == FormatStyle::LK_JavaScript &&
369  Line->MustBeDeclaration) {
370  // A 'case: string' style field declaration.
371  parseStructuralElement();
372  break;
373  }
374  if (!SwitchLabelEncountered &&
375  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
376  ++Line->Level;
377  SwitchLabelEncountered = true;
378  parseStructuralElement();
379  break;
380  default:
381  parseStructuralElement();
382  break;
383  }
384  } while (!eof());
385 }
386 
387 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
388  // We'll parse forward through the tokens until we hit
389  // a closing brace or eof - note that getNextToken() will
390  // parse macros, so this will magically work inside macro
391  // definitions, too.
392  unsigned StoredPosition = Tokens->getPosition();
393  FormatToken *Tok = FormatTok;
394  const FormatToken *PrevTok = Tok->Previous;
395  // Keep a stack of positions of lbrace tokens. We will
396  // update information about whether an lbrace starts a
397  // braced init list or a different block during the loop.
398  SmallVector<FormatToken *, 8> LBraceStack;
399  assert(Tok->Tok.is(tok::l_brace));
400  do {
401  // Get next non-comment token.
402  FormatToken *NextTok;
403  unsigned ReadTokens = 0;
404  do {
405  NextTok = Tokens->getNextToken();
406  ++ReadTokens;
407  } while (NextTok->is(tok::comment));
408 
409  switch (Tok->Tok.getKind()) {
410  case tok::l_brace:
411  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
412  if (PrevTok->isOneOf(tok::colon, tok::less))
413  // A ':' indicates this code is in a type, or a braced list
414  // following a label in an object literal ({a: {b: 1}}).
415  // A '<' could be an object used in a comparison, but that is nonsense
416  // code (can never return true), so more likely it is a generic type
417  // argument (`X<{a: string; b: number}>`).
418  // The code below could be confused by semicolons between the
419  // individual members in a type member list, which would normally
420  // trigger BK_Block. In both cases, this must be parsed as an inline
421  // braced init.
422  Tok->BlockKind = BK_BracedInit;
423  else if (PrevTok->is(tok::r_paren))
424  // `) { }` can only occur in function or method declarations in JS.
425  Tok->BlockKind = BK_Block;
426  } else {
427  Tok->BlockKind = BK_Unknown;
428  }
429  LBraceStack.push_back(Tok);
430  break;
431  case tok::r_brace:
432  if (LBraceStack.empty())
433  break;
434  if (LBraceStack.back()->BlockKind == BK_Unknown) {
435  bool ProbablyBracedList = false;
436  if (Style.Language == FormatStyle::LK_Proto) {
437  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
438  } else {
439  // Using OriginalColumn to distinguish between ObjC methods and
440  // binary operators is a bit hacky.
441  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
442  NextTok->OriginalColumn == 0;
443 
444  // If there is a comma, semicolon or right paren after the closing
445  // brace, we assume this is a braced initializer list. Note that
446  // regardless how we mark inner braces here, we will overwrite the
447  // BlockKind later if we parse a braced list (where all blocks
448  // inside are by default braced lists), or when we explicitly detect
449  // blocks (for example while parsing lambdas).
450  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
451  // braced list in JS.
452  ProbablyBracedList =
454  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
455  Keywords.kw_as)) ||
456  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
457  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
458  tok::r_paren, tok::r_square, tok::l_brace,
459  tok::ellipsis) ||
460  (NextTok->is(tok::identifier) &&
461  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
462  (NextTok->is(tok::semi) &&
463  (!ExpectClassBody || LBraceStack.size() != 1)) ||
464  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
465  if (NextTok->is(tok::l_square)) {
466  // We can have an array subscript after a braced init
467  // list, but C++11 attributes are expected after blocks.
468  NextTok = Tokens->getNextToken();
469  ++ReadTokens;
470  ProbablyBracedList = NextTok->isNot(tok::l_square);
471  }
472  }
473  if (ProbablyBracedList) {
474  Tok->BlockKind = BK_BracedInit;
475  LBraceStack.back()->BlockKind = BK_BracedInit;
476  } else {
477  Tok->BlockKind = BK_Block;
478  LBraceStack.back()->BlockKind = BK_Block;
479  }
480  }
481  LBraceStack.pop_back();
482  break;
483  case tok::identifier:
484  if (!Tok->is(TT_StatementMacro))
485  break;
486  LLVM_FALLTHROUGH;
487  case tok::at:
488  case tok::semi:
489  case tok::kw_if:
490  case tok::kw_while:
491  case tok::kw_for:
492  case tok::kw_switch:
493  case tok::kw_try:
494  case tok::kw___try:
495  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
496  LBraceStack.back()->BlockKind = BK_Block;
497  break;
498  default:
499  break;
500  }
501  PrevTok = Tok;
502  Tok = NextTok;
503  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
504 
505  // Assume other blocks for all unclosed opening braces.
506  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
507  if (LBraceStack[i]->BlockKind == BK_Unknown)
508  LBraceStack[i]->BlockKind = BK_Block;
509  }
510 
511  FormatTok = Tokens->setPosition(StoredPosition);
512 }
513 
514 template <class T>
515 static inline void hash_combine(std::size_t &seed, const T &v) {
516  std::hash<T> hasher;
517  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
518 }
519 
520 size_t UnwrappedLineParser::computePPHash() const {
521  size_t h = 0;
522  for (const auto &i : PPStack) {
523  hash_combine(h, size_t(i.Kind));
524  hash_combine(h, i.Line);
525  }
526  return h;
527 }
528 
529 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
530  bool MunchSemi) {
531  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
532  "'{' or macro block token expected");
533  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
534  FormatTok->BlockKind = BK_Block;
535 
536  size_t PPStartHash = computePPHash();
537 
538  unsigned InitialLevel = Line->Level;
539  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
540 
541  if (MacroBlock && FormatTok->is(tok::l_paren))
542  parseParens();
543 
544  size_t NbPreprocessorDirectives =
545  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
546  addUnwrappedLine();
547  size_t OpeningLineIndex =
548  CurrentLines->empty()
550  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
551 
552  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
553  MustBeDeclaration);
554  if (AddLevel)
555  ++Line->Level;
556  parseLevel(/*HasOpeningBrace=*/true);
557 
558  if (eof())
559  return;
560 
561  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
562  : !FormatTok->is(tok::r_brace)) {
563  Line->Level = InitialLevel;
564  FormatTok->BlockKind = BK_Block;
565  return;
566  }
567 
568  size_t PPEndHash = computePPHash();
569 
570  // Munch the closing brace.
571  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
572 
573  if (MacroBlock && FormatTok->is(tok::l_paren))
574  parseParens();
575 
576  if (MunchSemi && FormatTok->Tok.is(tok::semi))
577  nextToken();
578  Line->Level = InitialLevel;
579 
580  if (PPStartHash == PPEndHash) {
581  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
582  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
583  // Update the opening line to add the forward reference as well
584  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
585  CurrentLines->size() - 1;
586  }
587  }
588 }
589 
590 static bool isGoogScope(const UnwrappedLine &Line) {
591  // FIXME: Closure-library specific stuff should not be hard-coded but be
592  // configurable.
593  if (Line.Tokens.size() < 4)
594  return false;
595  auto I = Line.Tokens.begin();
596  if (I->Tok->TokenText != "goog")
597  return false;
598  ++I;
599  if (I->Tok->isNot(tok::period))
600  return false;
601  ++I;
602  if (I->Tok->TokenText != "scope")
603  return false;
604  ++I;
605  return I->Tok->is(tok::l_paren);
606 }
607 
608 static bool isIIFE(const UnwrappedLine &Line,
609  const AdditionalKeywords &Keywords) {
610  // Look for the start of an immediately invoked anonymous function.
611  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
612  // This is commonly done in JavaScript to create a new, anonymous scope.
613  // Example: (function() { ... })()
614  if (Line.Tokens.size() < 3)
615  return false;
616  auto I = Line.Tokens.begin();
617  if (I->Tok->isNot(tok::l_paren))
618  return false;
619  ++I;
620  if (I->Tok->isNot(Keywords.kw_function))
621  return false;
622  ++I;
623  return I->Tok->is(tok::l_paren);
624 }
625 
626 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
627  const FormatToken &InitialToken) {
628  if (InitialToken.is(tok::kw_namespace))
629  return Style.BraceWrapping.AfterNamespace;
630  if (InitialToken.is(tok::kw_class))
631  return Style.BraceWrapping.AfterClass;
632  if (InitialToken.is(tok::kw_union))
633  return Style.BraceWrapping.AfterUnion;
634  if (InitialToken.is(tok::kw_struct))
635  return Style.BraceWrapping.AfterStruct;
636  return false;
637 }
638 
639 void UnwrappedLineParser::parseChildBlock() {
640  FormatTok->BlockKind = BK_Block;
641  nextToken();
642  {
643  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
644  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
645  ScopedLineState LineState(*this);
646  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
647  /*MustBeDeclaration=*/false);
648  Line->Level += SkipIndent ? 0 : 1;
649  parseLevel(/*HasOpeningBrace=*/true);
650  flushComments(isOnNewLine(*FormatTok));
651  Line->Level -= SkipIndent ? 0 : 1;
652  }
653  nextToken();
654 }
655 
656 void UnwrappedLineParser::parsePPDirective() {
657  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
658  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
659  nextToken();
660 
661  if (!FormatTok->Tok.getIdentifierInfo()) {
662  parsePPUnknown();
663  return;
664  }
665 
666  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
667  case tok::pp_define:
668  parsePPDefine();
669  return;
670  case tok::pp_if:
671  parsePPIf(/*IfDef=*/false);
672  break;
673  case tok::pp_ifdef:
674  case tok::pp_ifndef:
675  parsePPIf(/*IfDef=*/true);
676  break;
677  case tok::pp_else:
678  parsePPElse();
679  break;
680  case tok::pp_elif:
681  parsePPElIf();
682  break;
683  case tok::pp_endif:
684  parsePPEndIf();
685  break;
686  default:
687  parsePPUnknown();
688  break;
689  }
690 }
691 
692 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
693  size_t Line = CurrentLines->size();
694  if (CurrentLines == &PreprocessorDirectives)
695  Line += Lines.size();
696 
697  if (Unreachable ||
698  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
699  PPStack.push_back({PP_Unreachable, Line});
700  else
701  PPStack.push_back({PP_Conditional, Line});
702 }
703 
704 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
705  ++PPBranchLevel;
706  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
707  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
708  PPLevelBranchIndex.push_back(0);
709  PPLevelBranchCount.push_back(0);
710  }
711  PPChainBranchIndex.push(0);
712  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
713  conditionalCompilationCondition(Unreachable || Skip);
714 }
715 
716 void UnwrappedLineParser::conditionalCompilationAlternative() {
717  if (!PPStack.empty())
718  PPStack.pop_back();
719  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
720  if (!PPChainBranchIndex.empty())
721  ++PPChainBranchIndex.top();
722  conditionalCompilationCondition(
723  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
724  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
725 }
726 
727 void UnwrappedLineParser::conditionalCompilationEnd() {
728  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
729  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
730  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
731  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
732  }
733  }
734  // Guard against #endif's without #if.
735  if (PPBranchLevel > -1)
736  --PPBranchLevel;
737  if (!PPChainBranchIndex.empty())
738  PPChainBranchIndex.pop();
739  if (!PPStack.empty())
740  PPStack.pop_back();
741 }
742 
743 void UnwrappedLineParser::parsePPIf(bool IfDef) {
744  bool IfNDef = FormatTok->is(tok::pp_ifndef);
745  nextToken();
746  bool Unreachable = false;
747  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
748  Unreachable = true;
749  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
750  Unreachable = true;
751  conditionalCompilationStart(Unreachable);
752  FormatToken *IfCondition = FormatTok;
753  // If there's a #ifndef on the first line, and the only lines before it are
754  // comments, it could be an include guard.
755  bool MaybeIncludeGuard = IfNDef;
756  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
757  for (auto &Line : Lines) {
758  if (!Line.Tokens.front().Tok->is(tok::comment)) {
759  MaybeIncludeGuard = false;
760  IncludeGuard = IG_Rejected;
761  break;
762  }
763  }
764  --PPBranchLevel;
765  parsePPUnknown();
766  ++PPBranchLevel;
767  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
768  IncludeGuard = IG_IfNdefed;
769  IncludeGuardToken = IfCondition;
770  }
771 }
772 
773 void UnwrappedLineParser::parsePPElse() {
774  // If a potential include guard has an #else, it's not an include guard.
775  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
776  IncludeGuard = IG_Rejected;
777  conditionalCompilationAlternative();
778  if (PPBranchLevel > -1)
779  --PPBranchLevel;
780  parsePPUnknown();
781  ++PPBranchLevel;
782 }
783 
784 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
785 
786 void UnwrappedLineParser::parsePPEndIf() {
787  conditionalCompilationEnd();
788  parsePPUnknown();
789  // If the #endif of a potential include guard is the last thing in the file,
790  // then we found an include guard.
791  unsigned TokenPosition = Tokens->getPosition();
792  FormatToken *PeekNext = AllTokens[TokenPosition];
793  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
794  PeekNext->is(tok::eof) &&
796  IncludeGuard = IG_Found;
797 }
798 
799 void UnwrappedLineParser::parsePPDefine() {
800  nextToken();
801 
802  if (FormatTok->Tok.getKind() != tok::identifier) {
803  IncludeGuard = IG_Rejected;
804  IncludeGuardToken = nullptr;
805  parsePPUnknown();
806  return;
807  }
808 
809  if (IncludeGuard == IG_IfNdefed &&
810  IncludeGuardToken->TokenText == FormatTok->TokenText) {
811  IncludeGuard = IG_Defined;
812  IncludeGuardToken = nullptr;
813  for (auto &Line : Lines) {
814  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
815  IncludeGuard = IG_Rejected;
816  break;
817  }
818  }
819  }
820 
821  nextToken();
822  if (FormatTok->Tok.getKind() == tok::l_paren &&
823  FormatTok->WhitespaceRange.getBegin() ==
824  FormatTok->WhitespaceRange.getEnd()) {
825  parseParens();
826  }
828  Line->Level += PPBranchLevel + 1;
829  addUnwrappedLine();
830  ++Line->Level;
831 
832  // Errors during a preprocessor directive can only affect the layout of the
833  // preprocessor directive, and thus we ignore them. An alternative approach
834  // would be to use the same approach we use on the file level (no
835  // re-indentation if there was a structural error) within the macro
836  // definition.
837  parseFile();
838 }
839 
840 void UnwrappedLineParser::parsePPUnknown() {
841  do {
842  nextToken();
843  } while (!eof());
845  Line->Level += PPBranchLevel + 1;
846  addUnwrappedLine();
847 }
848 
849 // Here we blacklist certain tokens that are not usually the first token in an
850 // unwrapped line. This is used in attempt to distinguish macro calls without
851 // trailing semicolons from other constructs split to several lines.
852 static bool tokenCanStartNewLine(const clang::Token &Tok) {
853  // Semicolon can be a null-statement, l_square can be a start of a macro or
854  // a C++11 attribute, but this doesn't seem to be common.
855  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
856  Tok.isNot(tok::l_square) &&
857  // Tokens that can only be used as binary operators and a part of
858  // overloaded operator names.
859  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
860  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
861  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
862  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
863  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
864  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
865  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
866  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
867  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
868  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
869  Tok.isNot(tok::lesslessequal) &&
870  // Colon is used in labels, base class lists, initializer lists,
871  // range-based for loops, ternary operator, but should never be the
872  // first token in an unwrapped line.
873  Tok.isNot(tok::colon) &&
874  // 'noexcept' is a trailing annotation.
875  Tok.isNot(tok::kw_noexcept);
876 }
877 
878 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
879  const FormatToken *FormatTok) {
880  // FIXME: This returns true for C/C++ keywords like 'struct'.
881  return FormatTok->is(tok::identifier) &&
882  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
883  !FormatTok->isOneOf(
884  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
885  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
886  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
887  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
888  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
889  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
890  Keywords.kw_from));
891 }
892 
893 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
894  const FormatToken *FormatTok) {
895  return FormatTok->Tok.isLiteral() ||
896  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
897  mustBeJSIdent(Keywords, FormatTok);
898 }
899 
900 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
901 // when encountered after a value (see mustBeJSIdentOrValue).
902 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
903  const FormatToken *FormatTok) {
904  return FormatTok->isOneOf(
905  tok::kw_return, Keywords.kw_yield,
906  // conditionals
907  tok::kw_if, tok::kw_else,
908  // loops
909  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
910  // switch/case
911  tok::kw_switch, tok::kw_case,
912  // exceptions
913  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
914  // declaration
915  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
916  Keywords.kw_async, Keywords.kw_function,
917  // import/export
918  Keywords.kw_import, tok::kw_export);
919 }
920 
921 // readTokenWithJavaScriptASI reads the next token and terminates the current
922 // line if JavaScript Automatic Semicolon Insertion must
923 // happen between the current token and the next token.
924 //
925 // This method is conservative - it cannot cover all edge cases of JavaScript,
926 // but only aims to correctly handle certain well known cases. It *must not*
927 // return true in speculative cases.
928 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
929  FormatToken *Previous = FormatTok;
930  readToken();
931  FormatToken *Next = FormatTok;
932 
933  bool IsOnSameLine =
934  CommentsBeforeNextToken.empty()
935  ? Next->NewlinesBefore == 0
936  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
937  if (IsOnSameLine)
938  return;
939 
940  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
941  bool PreviousStartsTemplateExpr =
942  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
943  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
944  // If the line contains an '@' sign, the previous token might be an
945  // annotation, which can precede another identifier/value.
946  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
947  [](UnwrappedLineNode &LineNode) {
948  return LineNode.Tok->is(tok::at);
949  }) != Line->Tokens.end();
950  if (HasAt)
951  return;
952  }
953  if (Next->is(tok::exclaim) && PreviousMustBeValue)
954  return addUnwrappedLine();
955  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
956  bool NextEndsTemplateExpr =
957  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
958  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
959  (PreviousMustBeValue ||
960  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
961  tok::minusminus)))
962  return addUnwrappedLine();
963  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
964  isJSDeclOrStmt(Keywords, Next))
965  return addUnwrappedLine();
966 }
967 
968 void UnwrappedLineParser::parseStructuralElement() {
969  assert(!FormatTok->is(tok::l_brace));
970  if (Style.Language == FormatStyle::LK_TableGen &&
971  FormatTok->is(tok::pp_include)) {
972  nextToken();
973  if (FormatTok->is(tok::string_literal))
974  nextToken();
975  addUnwrappedLine();
976  return;
977  }
978  switch (FormatTok->Tok.getKind()) {
979  case tok::kw_asm:
980  nextToken();
981  if (FormatTok->is(tok::l_brace)) {
982  FormatTok->Type = TT_InlineASMBrace;
983  nextToken();
984  while (FormatTok && FormatTok->isNot(tok::eof)) {
985  if (FormatTok->is(tok::r_brace)) {
986  FormatTok->Type = TT_InlineASMBrace;
987  nextToken();
988  addUnwrappedLine();
989  break;
990  }
991  FormatTok->Finalized = true;
992  nextToken();
993  }
994  }
995  break;
996  case tok::kw_namespace:
997  parseNamespace();
998  return;
999  case tok::kw_public:
1000  case tok::kw_protected:
1001  case tok::kw_private:
1002  if (Style.Language == FormatStyle::LK_Java ||
1004  nextToken();
1005  else
1006  parseAccessSpecifier();
1007  return;
1008  case tok::kw_if:
1009  parseIfThenElse();
1010  return;
1011  case tok::kw_for:
1012  case tok::kw_while:
1013  parseForOrWhileLoop();
1014  return;
1015  case tok::kw_do:
1016  parseDoWhile();
1017  return;
1018  case tok::kw_switch:
1019  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020  // 'switch: string' field declaration.
1021  break;
1022  parseSwitch();
1023  return;
1024  case tok::kw_default:
1025  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026  // 'default: string' field declaration.
1027  break;
1028  nextToken();
1029  if (FormatTok->is(tok::colon)) {
1030  parseLabel();
1031  return;
1032  }
1033  // e.g. "default void f() {}" in a Java interface.
1034  break;
1035  case tok::kw_case:
1036  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037  // 'case: string' field declaration.
1038  break;
1039  parseCaseLabel();
1040  return;
1041  case tok::kw_try:
1042  case tok::kw___try:
1043  parseTryCatch();
1044  return;
1045  case tok::kw_extern:
1046  nextToken();
1047  if (FormatTok->Tok.is(tok::string_literal)) {
1048  nextToken();
1049  if (FormatTok->Tok.is(tok::l_brace)) {
1050  if (Style.BraceWrapping.AfterExternBlock) {
1051  addUnwrappedLine();
1052  parseBlock(/*MustBeDeclaration=*/true);
1053  } else {
1054  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055  }
1056  addUnwrappedLine();
1057  return;
1058  }
1059  }
1060  break;
1061  case tok::kw_export:
1062  if (Style.Language == FormatStyle::LK_JavaScript) {
1063  parseJavaScriptEs6ImportExport();
1064  return;
1065  }
1066  if (!Style.isCpp())
1067  break;
1068  // Handle C++ "(inline|export) namespace".
1069  LLVM_FALLTHROUGH;
1070  case tok::kw_inline:
1071  nextToken();
1072  if (FormatTok->Tok.is(tok::kw_namespace)) {
1073  parseNamespace();
1074  return;
1075  }
1076  break;
1077  case tok::identifier:
1078  if (FormatTok->is(TT_ForEachMacro)) {
1079  parseForOrWhileLoop();
1080  return;
1081  }
1082  if (FormatTok->is(TT_MacroBlockBegin)) {
1083  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1084  /*MunchSemi=*/false);
1085  return;
1086  }
1087  if (FormatTok->is(Keywords.kw_import)) {
1088  if (Style.Language == FormatStyle::LK_JavaScript) {
1089  parseJavaScriptEs6ImportExport();
1090  return;
1091  }
1092  if (Style.Language == FormatStyle::LK_Proto) {
1093  nextToken();
1094  if (FormatTok->is(tok::kw_public))
1095  nextToken();
1096  if (!FormatTok->is(tok::string_literal))
1097  return;
1098  nextToken();
1099  if (FormatTok->is(tok::semi))
1100  nextToken();
1101  addUnwrappedLine();
1102  return;
1103  }
1104  }
1105  if (Style.isCpp() &&
1106  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1107  Keywords.kw_slots, Keywords.kw_qslots)) {
1108  nextToken();
1109  if (FormatTok->is(tok::colon)) {
1110  nextToken();
1111  addUnwrappedLine();
1112  return;
1113  }
1114  }
1115  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1116  parseStatementMacro();
1117  return;
1118  }
1119  // In all other cases, parse the declaration.
1120  break;
1121  default:
1122  break;
1123  }
1124  do {
1125  const FormatToken *Previous = FormatTok->Previous;
1126  switch (FormatTok->Tok.getKind()) {
1127  case tok::at:
1128  nextToken();
1129  if (FormatTok->Tok.is(tok::l_brace)) {
1130  nextToken();
1131  parseBracedList();
1132  break;
1133  }
1134  switch (FormatTok->Tok.getObjCKeywordID()) {
1135  case tok::objc_public:
1136  case tok::objc_protected:
1137  case tok::objc_package:
1138  case tok::objc_private:
1139  return parseAccessSpecifier();
1140  case tok::objc_interface:
1141  case tok::objc_implementation:
1142  return parseObjCInterfaceOrImplementation();
1143  case tok::objc_protocol:
1144  if (parseObjCProtocol())
1145  return;
1146  break;
1147  case tok::objc_end:
1148  return; // Handled by the caller.
1149  case tok::objc_optional:
1150  case tok::objc_required:
1151  nextToken();
1152  addUnwrappedLine();
1153  return;
1154  case tok::objc_autoreleasepool:
1155  nextToken();
1156  if (FormatTok->Tok.is(tok::l_brace)) {
1158  addUnwrappedLine();
1159  parseBlock(/*MustBeDeclaration=*/false);
1160  }
1161  addUnwrappedLine();
1162  return;
1163  case tok::objc_synchronized:
1164  nextToken();
1165  if (FormatTok->Tok.is(tok::l_paren))
1166  // Skip synchronization object
1167  parseParens();
1168  if (FormatTok->Tok.is(tok::l_brace)) {
1170  addUnwrappedLine();
1171  parseBlock(/*MustBeDeclaration=*/false);
1172  }
1173  addUnwrappedLine();
1174  return;
1175  case tok::objc_try:
1176  // This branch isn't strictly necessary (the kw_try case below would
1177  // do this too after the tok::at is parsed above). But be explicit.
1178  parseTryCatch();
1179  return;
1180  default:
1181  break;
1182  }
1183  break;
1184  case tok::kw_enum:
1185  // Ignore if this is part of "template <enum ...".
1186  if (Previous && Previous->is(tok::less)) {
1187  nextToken();
1188  break;
1189  }
1190 
1191  // parseEnum falls through and does not yet add an unwrapped line as an
1192  // enum definition can start a structural element.
1193  if (!parseEnum())
1194  break;
1195  // This only applies for C++.
1196  if (!Style.isCpp()) {
1197  addUnwrappedLine();
1198  return;
1199  }
1200  break;
1201  case tok::kw_typedef:
1202  nextToken();
1203  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1204  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1205  parseEnum();
1206  break;
1207  case tok::kw_struct:
1208  case tok::kw_union:
1209  case tok::kw_class:
1210  // parseRecord falls through and does not yet add an unwrapped line as a
1211  // record declaration or definition can start a structural element.
1212  parseRecord();
1213  // This does not apply for Java and JavaScript.
1214  if (Style.Language == FormatStyle::LK_Java ||
1216  if (FormatTok->is(tok::semi))
1217  nextToken();
1218  addUnwrappedLine();
1219  return;
1220  }
1221  break;
1222  case tok::period:
1223  nextToken();
1224  // In Java, classes have an implicit static member "class".
1225  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1226  FormatTok->is(tok::kw_class))
1227  nextToken();
1228  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1229  FormatTok->Tok.getIdentifierInfo())
1230  // JavaScript only has pseudo keywords, all keywords are allowed to
1231  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1232  nextToken();
1233  break;
1234  case tok::semi:
1235  nextToken();
1236  addUnwrappedLine();
1237  return;
1238  case tok::r_brace:
1239  addUnwrappedLine();
1240  return;
1241  case tok::l_paren:
1242  parseParens();
1243  break;
1244  case tok::kw_operator:
1245  nextToken();
1246  if (FormatTok->isBinaryOperator())
1247  nextToken();
1248  break;
1249  case tok::caret:
1250  nextToken();
1251  if (FormatTok->Tok.isAnyIdentifier() ||
1252  FormatTok->isSimpleTypeSpecifier())
1253  nextToken();
1254  if (FormatTok->is(tok::l_paren))
1255  parseParens();
1256  if (FormatTok->is(tok::l_brace))
1257  parseChildBlock();
1258  break;
1259  case tok::l_brace:
1260  if (!tryToParseBracedList()) {
1261  // A block outside of parentheses must be the last part of a
1262  // structural element.
1263  // FIXME: Figure out cases where this is not true, and add projections
1264  // for them (the one we know is missing are lambdas).
1265  if (Style.BraceWrapping.AfterFunction)
1266  addUnwrappedLine();
1267  FormatTok->Type = TT_FunctionLBrace;
1268  parseBlock(/*MustBeDeclaration=*/false);
1269  addUnwrappedLine();
1270  return;
1271  }
1272  // Otherwise this was a braced init list, and the structural
1273  // element continues.
1274  break;
1275  case tok::kw_try:
1276  // We arrive here when parsing function-try blocks.
1277  if (Style.BraceWrapping.AfterFunction)
1278  addUnwrappedLine();
1279  parseTryCatch();
1280  return;
1281  case tok::identifier: {
1282  if (FormatTok->is(TT_MacroBlockEnd)) {
1283  addUnwrappedLine();
1284  return;
1285  }
1286 
1287  // Function declarations (as opposed to function expressions) are parsed
1288  // on their own unwrapped line by continuing this loop. Function
1289  // expressions (functions that are not on their own line) must not create
1290  // a new unwrapped line, so they are special cased below.
1291  size_t TokenCount = Line->Tokens.size();
1292  if (Style.Language == FormatStyle::LK_JavaScript &&
1293  FormatTok->is(Keywords.kw_function) &&
1294  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1295  Keywords.kw_async)))) {
1296  tryToParseJSFunction();
1297  break;
1298  }
1299  if ((Style.Language == FormatStyle::LK_JavaScript ||
1300  Style.Language == FormatStyle::LK_Java) &&
1301  FormatTok->is(Keywords.kw_interface)) {
1302  if (Style.Language == FormatStyle::LK_JavaScript) {
1303  // In JavaScript/TypeScript, "interface" can be used as a standalone
1304  // identifier, e.g. in `var interface = 1;`. If "interface" is
1305  // followed by another identifier, it is very like to be an actual
1306  // interface declaration.
1307  unsigned StoredPosition = Tokens->getPosition();
1308  FormatToken *Next = Tokens->getNextToken();
1309  FormatTok = Tokens->setPosition(StoredPosition);
1310  if (Next && !mustBeJSIdent(Keywords, Next)) {
1311  nextToken();
1312  break;
1313  }
1314  }
1315  parseRecord();
1316  addUnwrappedLine();
1317  return;
1318  }
1319 
1320  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1321  parseStatementMacro();
1322  return;
1323  }
1324 
1325  // See if the following token should start a new unwrapped line.
1326  StringRef Text = FormatTok->TokenText;
1327  nextToken();
1328  if (Line->Tokens.size() == 1 &&
1329  // JS doesn't have macros, and within classes colons indicate fields,
1330  // not labels.
1332  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1333  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1334  parseLabel();
1335  return;
1336  }
1337  // Recognize function-like macro usages without trailing semicolon as
1338  // well as free-standing macros like Q_OBJECT.
1339  bool FunctionLike = FormatTok->is(tok::l_paren);
1340  if (FunctionLike)
1341  parseParens();
1342 
1343  bool FollowedByNewline =
1344  CommentsBeforeNextToken.empty()
1345  ? FormatTok->NewlinesBefore > 0
1346  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1347 
1348  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1349  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1350  addUnwrappedLine();
1351  return;
1352  }
1353  }
1354  break;
1355  }
1356  case tok::equal:
1357  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1358  // TT_JsFatArrow. The always start an expression or a child block if
1359  // followed by a curly.
1360  if (FormatTok->is(TT_JsFatArrow)) {
1361  nextToken();
1362  if (FormatTok->is(tok::l_brace))
1363  parseChildBlock();
1364  break;
1365  }
1366 
1367  nextToken();
1368  if (FormatTok->Tok.is(tok::l_brace)) {
1369  nextToken();
1370  parseBracedList();
1371  } else if (Style.Language == FormatStyle::LK_Proto &&
1372  FormatTok->Tok.is(tok::less)) {
1373  nextToken();
1374  parseBracedList(/*ContinueOnSemicolons=*/false,
1375  /*ClosingBraceKind=*/tok::greater);
1376  }
1377  break;
1378  case tok::l_square:
1379  parseSquare();
1380  break;
1381  case tok::kw_new:
1382  parseNew();
1383  break;
1384  default:
1385  nextToken();
1386  break;
1387  }
1388  } while (!eof());
1389 }
1390 
1391 bool UnwrappedLineParser::tryToParseLambda() {
1392  if (!Style.isCpp()) {
1393  nextToken();
1394  return false;
1395  }
1396  assert(FormatTok->is(tok::l_square));
1397  FormatToken &LSquare = *FormatTok;
1398  if (!tryToParseLambdaIntroducer())
1399  return false;
1400 
1401  while (FormatTok->isNot(tok::l_brace)) {
1402  if (FormatTok->isSimpleTypeSpecifier()) {
1403  nextToken();
1404  continue;
1405  }
1406  switch (FormatTok->Tok.getKind()) {
1407  case tok::l_brace:
1408  break;
1409  case tok::l_paren:
1410  parseParens();
1411  break;
1412  case tok::amp:
1413  case tok::star:
1414  case tok::kw_const:
1415  case tok::comma:
1416  case tok::less:
1417  case tok::greater:
1418  case tok::identifier:
1419  case tok::numeric_constant:
1420  case tok::coloncolon:
1421  case tok::kw_mutable:
1422  nextToken();
1423  break;
1424  case tok::arrow:
1425  FormatTok->Type = TT_LambdaArrow;
1426  nextToken();
1427  break;
1428  default:
1429  return true;
1430  }
1431  }
1432  LSquare.Type = TT_LambdaLSquare;
1433  parseChildBlock();
1434  return true;
1435 }
1436 
1437 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1438  const FormatToken *Previous = FormatTok->Previous;
1439  if (Previous &&
1440  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1441  tok::kw_delete, tok::l_square) ||
1442  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1443  Previous->isSimpleTypeSpecifier())) {
1444  nextToken();
1445  return false;
1446  }
1447  nextToken();
1448  if (FormatTok->is(tok::l_square)) {
1449  return false;
1450  }
1451  parseSquare(/*LambdaIntroducer=*/true);
1452  return true;
1453 }
1454 
1455 void UnwrappedLineParser::tryToParseJSFunction() {
1456  assert(FormatTok->is(Keywords.kw_function) ||
1457  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1458  if (FormatTok->is(Keywords.kw_async))
1459  nextToken();
1460  // Consume "function".
1461  nextToken();
1462 
1463  // Consume * (generator function). Treat it like C++'s overloaded operators.
1464  if (FormatTok->is(tok::star)) {
1465  FormatTok->Type = TT_OverloadedOperator;
1466  nextToken();
1467  }
1468 
1469  // Consume function name.
1470  if (FormatTok->is(tok::identifier))
1471  nextToken();
1472 
1473  if (FormatTok->isNot(tok::l_paren))
1474  return;
1475 
1476  // Parse formal parameter list.
1477  parseParens();
1478 
1479  if (FormatTok->is(tok::colon)) {
1480  // Parse a type definition.
1481  nextToken();
1482 
1483  // Eat the type declaration. For braced inline object types, balance braces,
1484  // otherwise just parse until finding an l_brace for the function body.
1485  if (FormatTok->is(tok::l_brace))
1486  tryToParseBracedList();
1487  else
1488  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1489  nextToken();
1490  }
1491 
1492  if (FormatTok->is(tok::semi))
1493  return;
1494 
1495  parseChildBlock();
1496 }
1497 
1498 bool UnwrappedLineParser::tryToParseBracedList() {
1499  if (FormatTok->BlockKind == BK_Unknown)
1500  calculateBraceTypes();
1501  assert(FormatTok->BlockKind != BK_Unknown);
1502  if (FormatTok->BlockKind == BK_Block)
1503  return false;
1504  nextToken();
1505  parseBracedList();
1506  return true;
1507 }
1508 
1509 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1510  tok::TokenKind ClosingBraceKind) {
1511  bool HasError = false;
1512 
1513  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1514  // replace this by using parseAssigmentExpression() inside.
1515  do {
1516  if (Style.Language == FormatStyle::LK_JavaScript) {
1517  if (FormatTok->is(Keywords.kw_function) ||
1518  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1519  tryToParseJSFunction();
1520  continue;
1521  }
1522  if (FormatTok->is(TT_JsFatArrow)) {
1523  nextToken();
1524  // Fat arrows can be followed by simple expressions or by child blocks
1525  // in curly braces.
1526  if (FormatTok->is(tok::l_brace)) {
1527  parseChildBlock();
1528  continue;
1529  }
1530  }
1531  if (FormatTok->is(tok::l_brace)) {
1532  // Could be a method inside of a braced list `{a() { return 1; }}`.
1533  if (tryToParseBracedList())
1534  continue;
1535  parseChildBlock();
1536  }
1537  }
1538  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1539  nextToken();
1540  return !HasError;
1541  }
1542  switch (FormatTok->Tok.getKind()) {
1543  case tok::caret:
1544  nextToken();
1545  if (FormatTok->is(tok::l_brace)) {
1546  parseChildBlock();
1547  }
1548  break;
1549  case tok::l_square:
1550  tryToParseLambda();
1551  break;
1552  case tok::l_paren:
1553  parseParens();
1554  // JavaScript can just have free standing methods and getters/setters in
1555  // object literals. Detect them by a "{" following ")".
1556  if (Style.Language == FormatStyle::LK_JavaScript) {
1557  if (FormatTok->is(tok::l_brace))
1558  parseChildBlock();
1559  break;
1560  }
1561  break;
1562  case tok::l_brace:
1563  // Assume there are no blocks inside a braced init list apart
1564  // from the ones we explicitly parse out (like lambdas).
1565  FormatTok->BlockKind = BK_BracedInit;
1566  nextToken();
1567  parseBracedList();
1568  break;
1569  case tok::less:
1570  if (Style.Language == FormatStyle::LK_Proto) {
1571  nextToken();
1572  parseBracedList(/*ContinueOnSemicolons=*/false,
1573  /*ClosingBraceKind=*/tok::greater);
1574  } else {
1575  nextToken();
1576  }
1577  break;
1578  case tok::semi:
1579  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1580  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1581  // used for error recovery if we have otherwise determined that this is
1582  // a braced list.
1583  if (Style.Language == FormatStyle::LK_JavaScript) {
1584  nextToken();
1585  break;
1586  }
1587  HasError = true;
1588  if (!ContinueOnSemicolons)
1589  return !HasError;
1590  nextToken();
1591  break;
1592  case tok::comma:
1593  nextToken();
1594  break;
1595  default:
1596  nextToken();
1597  break;
1598  }
1599  } while (!eof());
1600  return false;
1601 }
1602 
1603 void UnwrappedLineParser::parseParens() {
1604  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1605  nextToken();
1606  do {
1607  switch (FormatTok->Tok.getKind()) {
1608  case tok::l_paren:
1609  parseParens();
1610  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1611  parseChildBlock();
1612  break;
1613  case tok::r_paren:
1614  nextToken();
1615  return;
1616  case tok::r_brace:
1617  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1618  return;
1619  case tok::l_square:
1620  tryToParseLambda();
1621  break;
1622  case tok::l_brace:
1623  if (!tryToParseBracedList())
1624  parseChildBlock();
1625  break;
1626  case tok::at:
1627  nextToken();
1628  if (FormatTok->Tok.is(tok::l_brace)) {
1629  nextToken();
1630  parseBracedList();
1631  }
1632  break;
1633  case tok::kw_class:
1634  if (Style.Language == FormatStyle::LK_JavaScript)
1635  parseRecord(/*ParseAsExpr=*/true);
1636  else
1637  nextToken();
1638  break;
1639  case tok::identifier:
1640  if (Style.Language == FormatStyle::LK_JavaScript &&
1641  (FormatTok->is(Keywords.kw_function) ||
1642  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1643  tryToParseJSFunction();
1644  else
1645  nextToken();
1646  break;
1647  default:
1648  nextToken();
1649  break;
1650  }
1651  } while (!eof());
1652 }
1653 
1654 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1655  if (!LambdaIntroducer) {
1656  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1657  if (tryToParseLambda())
1658  return;
1659  }
1660  do {
1661  switch (FormatTok->Tok.getKind()) {
1662  case tok::l_paren:
1663  parseParens();
1664  break;
1665  case tok::r_square:
1666  nextToken();
1667  return;
1668  case tok::r_brace:
1669  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1670  return;
1671  case tok::l_square:
1672  parseSquare();
1673  break;
1674  case tok::l_brace: {
1675  if (!tryToParseBracedList())
1676  parseChildBlock();
1677  break;
1678  }
1679  case tok::at:
1680  nextToken();
1681  if (FormatTok->Tok.is(tok::l_brace)) {
1682  nextToken();
1683  parseBracedList();
1684  }
1685  break;
1686  default:
1687  nextToken();
1688  break;
1689  }
1690  } while (!eof());
1691 }
1692 
1693 void UnwrappedLineParser::parseIfThenElse() {
1694  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1695  nextToken();
1696  if (FormatTok->Tok.is(tok::kw_constexpr))
1697  nextToken();
1698  if (FormatTok->Tok.is(tok::l_paren))
1699  parseParens();
1700  bool NeedsUnwrappedLine = false;
1701  if (FormatTok->Tok.is(tok::l_brace)) {
1702  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1703  parseBlock(/*MustBeDeclaration=*/false);
1704  if (Style.BraceWrapping.BeforeElse)
1705  addUnwrappedLine();
1706  else
1707  NeedsUnwrappedLine = true;
1708  } else {
1709  addUnwrappedLine();
1710  ++Line->Level;
1711  parseStructuralElement();
1712  --Line->Level;
1713  }
1714  if (FormatTok->Tok.is(tok::kw_else)) {
1715  nextToken();
1716  if (FormatTok->Tok.is(tok::l_brace)) {
1717  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1718  parseBlock(/*MustBeDeclaration=*/false);
1719  addUnwrappedLine();
1720  } else if (FormatTok->Tok.is(tok::kw_if)) {
1721  parseIfThenElse();
1722  } else {
1723  addUnwrappedLine();
1724  ++Line->Level;
1725  parseStructuralElement();
1726  if (FormatTok->is(tok::eof))
1727  addUnwrappedLine();
1728  --Line->Level;
1729  }
1730  } else if (NeedsUnwrappedLine) {
1731  addUnwrappedLine();
1732  }
1733 }
1734 
1735 void UnwrappedLineParser::parseTryCatch() {
1736  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1737  nextToken();
1738  bool NeedsUnwrappedLine = false;
1739  if (FormatTok->is(tok::colon)) {
1740  // We are in a function try block, what comes is an initializer list.
1741  nextToken();
1742  while (FormatTok->is(tok::identifier)) {
1743  nextToken();
1744  if (FormatTok->is(tok::l_paren))
1745  parseParens();
1746  if (FormatTok->is(tok::comma))
1747  nextToken();
1748  }
1749  }
1750  // Parse try with resource.
1751  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1752  parseParens();
1753  }
1754  if (FormatTok->is(tok::l_brace)) {
1755  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1756  parseBlock(/*MustBeDeclaration=*/false);
1757  if (Style.BraceWrapping.BeforeCatch) {
1758  addUnwrappedLine();
1759  } else {
1760  NeedsUnwrappedLine = true;
1761  }
1762  } else if (!FormatTok->is(tok::kw_catch)) {
1763  // The C++ standard requires a compound-statement after a try.
1764  // If there's none, we try to assume there's a structuralElement
1765  // and try to continue.
1766  addUnwrappedLine();
1767  ++Line->Level;
1768  parseStructuralElement();
1769  --Line->Level;
1770  }
1771  while (1) {
1772  if (FormatTok->is(tok::at))
1773  nextToken();
1774  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1775  tok::kw___finally) ||
1776  ((Style.Language == FormatStyle::LK_Java ||
1778  FormatTok->is(Keywords.kw_finally)) ||
1779  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1780  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1781  break;
1782  nextToken();
1783  while (FormatTok->isNot(tok::l_brace)) {
1784  if (FormatTok->is(tok::l_paren)) {
1785  parseParens();
1786  continue;
1787  }
1788  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1789  return;
1790  nextToken();
1791  }
1792  NeedsUnwrappedLine = false;
1793  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1794  parseBlock(/*MustBeDeclaration=*/false);
1795  if (Style.BraceWrapping.BeforeCatch)
1796  addUnwrappedLine();
1797  else
1798  NeedsUnwrappedLine = true;
1799  }
1800  if (NeedsUnwrappedLine)
1801  addUnwrappedLine();
1802 }
1803 
1804 void UnwrappedLineParser::parseNamespace() {
1805  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1806 
1807  const FormatToken &InitialToken = *FormatTok;
1808  nextToken();
1809  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1810  nextToken();
1811  if (FormatTok->Tok.is(tok::l_brace)) {
1812  if (ShouldBreakBeforeBrace(Style, InitialToken))
1813  addUnwrappedLine();
1814 
1815  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1817  DeclarationScopeStack.size() > 1);
1818  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1819  // Munch the semicolon after a namespace. This is more common than one would
1820  // think. Puttin the semicolon into its own line is very ugly.
1821  if (FormatTok->Tok.is(tok::semi))
1822  nextToken();
1823  addUnwrappedLine();
1824  }
1825  // FIXME: Add error handling.
1826 }
1827 
1828 void UnwrappedLineParser::parseNew() {
1829  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1830  nextToken();
1831  if (Style.Language != FormatStyle::LK_Java)
1832  return;
1833 
1834  // In Java, we can parse everything up to the parens, which aren't optional.
1835  do {
1836  // There should not be a ;, { or } before the new's open paren.
1837  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1838  return;
1839 
1840  // Consume the parens.
1841  if (FormatTok->is(tok::l_paren)) {
1842  parseParens();
1843 
1844  // If there is a class body of an anonymous class, consume that as child.
1845  if (FormatTok->is(tok::l_brace))
1846  parseChildBlock();
1847  return;
1848  }
1849  nextToken();
1850  } while (!eof());
1851 }
1852 
1853 void UnwrappedLineParser::parseForOrWhileLoop() {
1854  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1855  "'for', 'while' or foreach macro expected");
1856  nextToken();
1857  // JS' for await ( ...
1858  if (Style.Language == FormatStyle::LK_JavaScript &&
1859  FormatTok->is(Keywords.kw_await))
1860  nextToken();
1861  if (FormatTok->Tok.is(tok::l_paren))
1862  parseParens();
1863  if (FormatTok->Tok.is(tok::l_brace)) {
1864  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1865  parseBlock(/*MustBeDeclaration=*/false);
1866  addUnwrappedLine();
1867  } else {
1868  addUnwrappedLine();
1869  ++Line->Level;
1870  parseStructuralElement();
1871  --Line->Level;
1872  }
1873 }
1874 
1875 void UnwrappedLineParser::parseDoWhile() {
1876  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1877  nextToken();
1878  if (FormatTok->Tok.is(tok::l_brace)) {
1879  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1880  parseBlock(/*MustBeDeclaration=*/false);
1881  if (Style.BraceWrapping.IndentBraces)
1882  addUnwrappedLine();
1883  } else {
1884  addUnwrappedLine();
1885  ++Line->Level;
1886  parseStructuralElement();
1887  --Line->Level;
1888  }
1889 
1890  // FIXME: Add error handling.
1891  if (!FormatTok->Tok.is(tok::kw_while)) {
1892  addUnwrappedLine();
1893  return;
1894  }
1895 
1896  nextToken();
1897  parseStructuralElement();
1898 }
1899 
1900 void UnwrappedLineParser::parseLabel() {
1901  nextToken();
1902  unsigned OldLineLevel = Line->Level;
1903  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1904  --Line->Level;
1905  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1906  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1907  parseBlock(/*MustBeDeclaration=*/false);
1908  if (FormatTok->Tok.is(tok::kw_break)) {
1910  addUnwrappedLine();
1911  parseStructuralElement();
1912  }
1913  addUnwrappedLine();
1914  } else {
1915  if (FormatTok->is(tok::semi))
1916  nextToken();
1917  addUnwrappedLine();
1918  }
1919  Line->Level = OldLineLevel;
1920  if (FormatTok->isNot(tok::l_brace)) {
1921  parseStructuralElement();
1922  addUnwrappedLine();
1923  }
1924 }
1925 
1926 void UnwrappedLineParser::parseCaseLabel() {
1927  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1928  // FIXME: fix handling of complex expressions here.
1929  do {
1930  nextToken();
1931  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1932  parseLabel();
1933 }
1934 
1935 void UnwrappedLineParser::parseSwitch() {
1936  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1937  nextToken();
1938  if (FormatTok->Tok.is(tok::l_paren))
1939  parseParens();
1940  if (FormatTok->Tok.is(tok::l_brace)) {
1941  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1942  parseBlock(/*MustBeDeclaration=*/false);
1943  addUnwrappedLine();
1944  } else {
1945  addUnwrappedLine();
1946  ++Line->Level;
1947  parseStructuralElement();
1948  --Line->Level;
1949  }
1950 }
1951 
1952 void UnwrappedLineParser::parseAccessSpecifier() {
1953  nextToken();
1954  // Understand Qt's slots.
1955  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1956  nextToken();
1957  // Otherwise, we don't know what it is, and we'd better keep the next token.
1958  if (FormatTok->Tok.is(tok::colon))
1959  nextToken();
1960  addUnwrappedLine();
1961 }
1962 
1963 bool UnwrappedLineParser::parseEnum() {
1964  // Won't be 'enum' for NS_ENUMs.
1965  if (FormatTok->Tok.is(tok::kw_enum))
1966  nextToken();
1967 
1968  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1969  // declarations. An "enum" keyword followed by a colon would be a syntax
1970  // error and thus assume it is just an identifier.
1971  if (Style.Language == FormatStyle::LK_JavaScript &&
1972  FormatTok->isOneOf(tok::colon, tok::question))
1973  return false;
1974 
1975  // Eat up enum class ...
1976  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1977  nextToken();
1978 
1979  while (FormatTok->Tok.getIdentifierInfo() ||
1980  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1981  tok::greater, tok::comma, tok::question)) {
1982  nextToken();
1983  // We can have macros or attributes in between 'enum' and the enum name.
1984  if (FormatTok->is(tok::l_paren))
1985  parseParens();
1986  if (FormatTok->is(tok::identifier)) {
1987  nextToken();
1988  // If there are two identifiers in a row, this is likely an elaborate
1989  // return type. In Java, this can be "implements", etc.
1990  if (Style.isCpp() && FormatTok->is(tok::identifier))
1991  return false;
1992  }
1993  }
1994 
1995  // Just a declaration or something is wrong.
1996  if (FormatTok->isNot(tok::l_brace))
1997  return true;
1998  FormatTok->BlockKind = BK_Block;
1999 
2000  if (Style.Language == FormatStyle::LK_Java) {
2001  // Java enums are different.
2002  parseJavaEnumBody();
2003  return true;
2004  }
2005  if (Style.Language == FormatStyle::LK_Proto) {
2006  parseBlock(/*MustBeDeclaration=*/true);
2007  return true;
2008  }
2009 
2010  // Parse enum body.
2011  nextToken();
2012  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2013  if (HasError) {
2014  if (FormatTok->is(tok::semi))
2015  nextToken();
2016  addUnwrappedLine();
2017  }
2018  return true;
2019 
2020  // There is no addUnwrappedLine() here so that we fall through to parsing a
2021  // structural element afterwards. Thus, in "enum A {} n, m;",
2022  // "} n, m;" will end up in one unwrapped line.
2023 }
2024 
2025 void UnwrappedLineParser::parseJavaEnumBody() {
2026  // Determine whether the enum is simple, i.e. does not have a semicolon or
2027  // constants with class bodies. Simple enums can be formatted like braced
2028  // lists, contracted to a single line, etc.
2029  unsigned StoredPosition = Tokens->getPosition();
2030  bool IsSimple = true;
2031  FormatToken *Tok = Tokens->getNextToken();
2032  while (Tok) {
2033  if (Tok->is(tok::r_brace))
2034  break;
2035  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2036  IsSimple = false;
2037  break;
2038  }
2039  // FIXME: This will also mark enums with braces in the arguments to enum
2040  // constants as "not simple". This is probably fine in practice, though.
2041  Tok = Tokens->getNextToken();
2042  }
2043  FormatTok = Tokens->setPosition(StoredPosition);
2044 
2045  if (IsSimple) {
2046  nextToken();
2047  parseBracedList();
2048  addUnwrappedLine();
2049  return;
2050  }
2051 
2052  // Parse the body of a more complex enum.
2053  // First add a line for everything up to the "{".
2054  nextToken();
2055  addUnwrappedLine();
2056  ++Line->Level;
2057 
2058  // Parse the enum constants.
2059  while (FormatTok) {
2060  if (FormatTok->is(tok::l_brace)) {
2061  // Parse the constant's class body.
2062  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2063  /*MunchSemi=*/false);
2064  } else if (FormatTok->is(tok::l_paren)) {
2065  parseParens();
2066  } else if (FormatTok->is(tok::comma)) {
2067  nextToken();
2068  addUnwrappedLine();
2069  } else if (FormatTok->is(tok::semi)) {
2070  nextToken();
2071  addUnwrappedLine();
2072  break;
2073  } else if (FormatTok->is(tok::r_brace)) {
2074  addUnwrappedLine();
2075  break;
2076  } else {
2077  nextToken();
2078  }
2079  }
2080 
2081  // Parse the class body after the enum's ";" if any.
2082  parseLevel(/*HasOpeningBrace=*/true);
2083  nextToken();
2084  --Line->Level;
2085  addUnwrappedLine();
2086 }
2087 
2088 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2089  const FormatToken &InitialToken = *FormatTok;
2090  nextToken();
2091 
2092  // The actual identifier can be a nested name specifier, and in macros
2093  // it is often token-pasted.
2094  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2095  tok::kw___attribute, tok::kw___declspec,
2096  tok::kw_alignas) ||
2097  ((Style.Language == FormatStyle::LK_Java ||
2099  FormatTok->isOneOf(tok::period, tok::comma))) {
2100  if (Style.Language == FormatStyle::LK_JavaScript &&
2101  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2102  // JavaScript/TypeScript supports inline object types in
2103  // extends/implements positions:
2104  // class Foo implements {bar: number} { }
2105  nextToken();
2106  if (FormatTok->is(tok::l_brace)) {
2107  tryToParseBracedList();
2108  continue;
2109  }
2110  }
2111  bool IsNonMacroIdentifier =
2112  FormatTok->is(tok::identifier) &&
2113  FormatTok->TokenText != FormatTok->TokenText.upper();
2114  nextToken();
2115  // We can have macros or attributes in between 'class' and the class name.
2116  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2117  parseParens();
2118  }
2119 
2120  // Note that parsing away template declarations here leads to incorrectly
2121  // accepting function declarations as record declarations.
2122  // In general, we cannot solve this problem. Consider:
2123  // class A<int> B() {}
2124  // which can be a function definition or a class definition when B() is a
2125  // macro. If we find enough real-world cases where this is a problem, we
2126  // can parse for the 'template' keyword in the beginning of the statement,
2127  // and thus rule out the record production in case there is no template
2128  // (this would still leave us with an ambiguity between template function
2129  // and class declarations).
2130  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2131  while (!eof()) {
2132  if (FormatTok->is(tok::l_brace)) {
2133  calculateBraceTypes(/*ExpectClassBody=*/true);
2134  if (!tryToParseBracedList())
2135  break;
2136  }
2137  if (FormatTok->Tok.is(tok::semi))
2138  return;
2139  nextToken();
2140  }
2141  }
2142  if (FormatTok->Tok.is(tok::l_brace)) {
2143  if (ParseAsExpr) {
2144  parseChildBlock();
2145  } else {
2146  if (ShouldBreakBeforeBrace(Style, InitialToken))
2147  addUnwrappedLine();
2148 
2149  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2150  /*MunchSemi=*/false);
2151  }
2152  }
2153  // There is no addUnwrappedLine() here so that we fall through to parsing a
2154  // structural element afterwards. Thus, in "class A {} n, m;",
2155  // "} n, m;" will end up in one unwrapped line.
2156 }
2157 
2158 void UnwrappedLineParser::parseObjCMethod() {
2159  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2160  "'(' or identifier expected.");
2161  do {
2162  if (FormatTok->Tok.is(tok::semi)) {
2163  nextToken();
2164  addUnwrappedLine();
2165  return;
2166  } else if (FormatTok->Tok.is(tok::l_brace)) {
2167  if (Style.BraceWrapping.AfterFunction)
2168  addUnwrappedLine();
2169  parseBlock(/*MustBeDeclaration=*/false);
2170  addUnwrappedLine();
2171  return;
2172  } else {
2173  nextToken();
2174  }
2175  } while (!eof());
2176 }
2177 
2178 void UnwrappedLineParser::parseObjCProtocolList() {
2179  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2180  do {
2181  nextToken();
2182  // Early exit in case someone forgot a close angle.
2183  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2184  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2185  return;
2186  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2187  nextToken(); // Skip '>'.
2188 }
2189 
2190 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2191  do {
2192  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2193  nextToken();
2194  addUnwrappedLine();
2195  break;
2196  }
2197  if (FormatTok->is(tok::l_brace)) {
2198  parseBlock(/*MustBeDeclaration=*/false);
2199  // In ObjC interfaces, nothing should be following the "}".
2200  addUnwrappedLine();
2201  } else if (FormatTok->is(tok::r_brace)) {
2202  // Ignore stray "}". parseStructuralElement doesn't consume them.
2203  nextToken();
2204  addUnwrappedLine();
2205  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2206  nextToken();
2207  parseObjCMethod();
2208  } else {
2209  parseStructuralElement();
2210  }
2211  } while (!eof());
2212 }
2213 
2214 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2215  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2216  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2217  nextToken();
2218  nextToken(); // interface name
2219 
2220  // @interface can be followed by a lightweight generic
2221  // specialization list, then either a base class or a category.
2222  if (FormatTok->Tok.is(tok::less)) {
2223  // Unlike protocol lists, generic parameterizations support
2224  // nested angles:
2225  //
2226  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2227  // NSObject <NSCopying, NSSecureCoding>
2228  //
2229  // so we need to count how many open angles we have left.
2230  unsigned NumOpenAngles = 1;
2231  do {
2232  nextToken();
2233  // Early exit in case someone forgot a close angle.
2234  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2235  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2236  break;
2237  if (FormatTok->Tok.is(tok::less))
2238  ++NumOpenAngles;
2239  else if (FormatTok->Tok.is(tok::greater)) {
2240  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2241  --NumOpenAngles;
2242  }
2243  } while (!eof() && NumOpenAngles != 0);
2244  nextToken(); // Skip '>'.
2245  }
2246  if (FormatTok->Tok.is(tok::colon)) {
2247  nextToken();
2248  nextToken(); // base class name
2249  } else if (FormatTok->Tok.is(tok::l_paren))
2250  // Skip category, if present.
2251  parseParens();
2252 
2253  if (FormatTok->Tok.is(tok::less))
2254  parseObjCProtocolList();
2255 
2256  if (FormatTok->Tok.is(tok::l_brace)) {
2258  addUnwrappedLine();
2259  parseBlock(/*MustBeDeclaration=*/true);
2260  }
2261 
2262  // With instance variables, this puts '}' on its own line. Without instance
2263  // variables, this ends the @interface line.
2264  addUnwrappedLine();
2265 
2266  parseObjCUntilAtEnd();
2267 }
2268 
2269 // Returns true for the declaration/definition form of @protocol,
2270 // false for the expression form.
2271 bool UnwrappedLineParser::parseObjCProtocol() {
2272  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2273  nextToken();
2274 
2275  if (FormatTok->is(tok::l_paren))
2276  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2277  return false;
2278 
2279  // The definition/declaration form,
2280  // @protocol Foo
2281  // - (int)someMethod;
2282  // @end
2283 
2284  nextToken(); // protocol name
2285 
2286  if (FormatTok->Tok.is(tok::less))
2287  parseObjCProtocolList();
2288 
2289  // Check for protocol declaration.
2290  if (FormatTok->Tok.is(tok::semi)) {
2291  nextToken();
2292  addUnwrappedLine();
2293  return true;
2294  }
2295 
2296  addUnwrappedLine();
2297  parseObjCUntilAtEnd();
2298  return true;
2299 }
2300 
2301 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2302  bool IsImport = FormatTok->is(Keywords.kw_import);
2303  assert(IsImport || FormatTok->is(tok::kw_export));
2304  nextToken();
2305 
2306  // Consume the "default" in "export default class/function".
2307  if (FormatTok->is(tok::kw_default))
2308  nextToken();
2309 
2310  // Consume "async function", "function" and "default function", so that these
2311  // get parsed as free-standing JS functions, i.e. do not require a trailing
2312  // semicolon.
2313  if (FormatTok->is(Keywords.kw_async))
2314  nextToken();
2315  if (FormatTok->is(Keywords.kw_function)) {
2316  nextToken();
2317  return;
2318  }
2319 
2320  // For imports, `export *`, `export {...}`, consume the rest of the line up
2321  // to the terminating `;`. For everything else, just return and continue
2322  // parsing the structural element, i.e. the declaration or expression for
2323  // `export default`.
2324  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2325  !FormatTok->isStringLiteral())
2326  return;
2327 
2328  while (!eof()) {
2329  if (FormatTok->is(tok::semi))
2330  return;
2331  if (Line->Tokens.empty()) {
2332  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2333  // import statement should terminate.
2334  return;
2335  }
2336  if (FormatTok->is(tok::l_brace)) {
2337  FormatTok->BlockKind = BK_Block;
2338  nextToken();
2339  parseBracedList();
2340  } else {
2341  nextToken();
2342  }
2343  }
2344 }
2345 
2346 void UnwrappedLineParser::parseStatementMacro()
2347 {
2348  nextToken();
2349  if (FormatTok->is(tok::l_paren))
2350  parseParens();
2351  if (FormatTok->is(tok::semi))
2352  nextToken();
2353  addUnwrappedLine();
2354 }
2355 
2356 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2357  StringRef Prefix = "") {
2358  llvm::dbgs() << Prefix << "Line(" << Line.Level
2359  << ", FSC=" << Line.FirstStartColumn << ")"
2360  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2361  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2362  E = Line.Tokens.end();
2363  I != E; ++I) {
2364  llvm::dbgs() << I->Tok->Tok.getName() << "["
2365  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2366  << "] ";
2367  }
2368  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2369  E = Line.Tokens.end();
2370  I != E; ++I) {
2371  const UnwrappedLineNode &Node = *I;
2373  I = Node.Children.begin(),
2374  E = Node.Children.end();
2375  I != E; ++I) {
2376  printDebugInfo(*I, "\nChild: ");
2377  }
2378  }
2379  llvm::dbgs() << "\n";
2380 }
2381 
2382 void UnwrappedLineParser::addUnwrappedLine() {
2383  if (Line->Tokens.empty())
2384  return;
2385  LLVM_DEBUG({
2386  if (CurrentLines == &Lines)
2387  printDebugInfo(*Line);
2388  });
2389  CurrentLines->push_back(std::move(*Line));
2390  Line->Tokens.clear();
2391  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2392  Line->FirstStartColumn = 0;
2393  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2394  CurrentLines->append(
2395  std::make_move_iterator(PreprocessorDirectives.begin()),
2396  std::make_move_iterator(PreprocessorDirectives.end()));
2397  PreprocessorDirectives.clear();
2398  }
2399  // Disconnect the current token from the last token on the previous line.
2400  FormatTok->Previous = nullptr;
2401 }
2402 
2403 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2404 
2405 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2406  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2407  FormatTok.NewlinesBefore > 0;
2408 }
2409 
2410 // Checks if \p FormatTok is a line comment that continues the line comment
2411 // section on \p Line.
2412 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2413  const UnwrappedLine &Line,
2414  llvm::Regex &CommentPragmasRegex) {
2415  if (Line.Tokens.empty())
2416  return false;
2417 
2418  StringRef IndentContent = FormatTok.TokenText;
2419  if (FormatTok.TokenText.startswith("//") ||
2420  FormatTok.TokenText.startswith("/*"))
2421  IndentContent = FormatTok.TokenText.substr(2);
2422  if (CommentPragmasRegex.match(IndentContent))
2423  return false;
2424 
2425  // If Line starts with a line comment, then FormatTok continues the comment
2426  // section if its original column is greater or equal to the original start
2427  // column of the line.
2428  //
2429  // Define the min column token of a line as follows: if a line ends in '{' or
2430  // contains a '{' followed by a line comment, then the min column token is
2431  // that '{'. Otherwise, the min column token of the line is the first token of
2432  // the line.
2433  //
2434  // If Line starts with a token other than a line comment, then FormatTok
2435  // continues the comment section if its original column is greater than the
2436  // original start column of the min column token of the line.
2437  //
2438  // For example, the second line comment continues the first in these cases:
2439  //
2440  // // first line
2441  // // second line
2442  //
2443  // and:
2444  //
2445  // // first line
2446  // // second line
2447  //
2448  // and:
2449  //
2450  // int i; // first line
2451  // // second line
2452  //
2453  // and:
2454  //
2455  // do { // first line
2456  // // second line
2457  // int i;
2458  // } while (true);
2459  //
2460  // and:
2461  //
2462  // enum {
2463  // a, // first line
2464  // // second line
2465  // b
2466  // };
2467  //
2468  // The second line comment doesn't continue the first in these cases:
2469  //
2470  // // first line
2471  // // second line
2472  //
2473  // and:
2474  //
2475  // int i; // first line
2476  // // second line
2477  //
2478  // and:
2479  //
2480  // do { // first line
2481  // // second line
2482  // int i;
2483  // } while (true);
2484  //
2485  // and:
2486  //
2487  // enum {
2488  // a, // first line
2489  // // second line
2490  // };
2491  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2492 
2493  // Scan for '{//'. If found, use the column of '{' as a min column for line
2494  // comment section continuation.
2495  const FormatToken *PreviousToken = nullptr;
2496  for (const UnwrappedLineNode &Node : Line.Tokens) {
2497  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2498  isLineComment(*Node.Tok)) {
2499  MinColumnToken = PreviousToken;
2500  break;
2501  }
2502  PreviousToken = Node.Tok;
2503 
2504  // Grab the last newline preceding a token in this unwrapped line.
2505  if (Node.Tok->NewlinesBefore > 0) {
2506  MinColumnToken = Node.Tok;
2507  }
2508  }
2509  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2510  MinColumnToken = PreviousToken;
2511  }
2512 
2513  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2514  MinColumnToken);
2515 }
2516 
2517 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2518  bool JustComments = Line->Tokens.empty();
2520  I = CommentsBeforeNextToken.begin(),
2521  E = CommentsBeforeNextToken.end();
2522  I != E; ++I) {
2523  // Line comments that belong to the same line comment section are put on the
2524  // same line since later we might want to reflow content between them.
2525  // Additional fine-grained breaking of line comment sections is controlled
2526  // by the class BreakableLineCommentSection in case it is desirable to keep
2527  // several line comment sections in the same unwrapped line.
2528  //
2529  // FIXME: Consider putting separate line comment sections as children to the
2530  // unwrapped line instead.
2532  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2533  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2534  addUnwrappedLine();
2535  pushToken(*I);
2536  }
2537  if (NewlineBeforeNext && JustComments)
2538  addUnwrappedLine();
2539  CommentsBeforeNextToken.clear();
2540 }
2541 
2542 void UnwrappedLineParser::nextToken(int LevelDifference) {
2543  if (eof())
2544  return;
2545  flushComments(isOnNewLine(*FormatTok));
2546  pushToken(FormatTok);
2547  FormatToken *Previous = FormatTok;
2548  if (Style.Language != FormatStyle::LK_JavaScript)
2549  readToken(LevelDifference);
2550  else
2551  readTokenWithJavaScriptASI();
2552  FormatTok->Previous = Previous;
2553 }
2554 
2555 void UnwrappedLineParser::distributeComments(
2556  const SmallVectorImpl<FormatToken *> &Comments,
2557  const FormatToken *NextTok) {
2558  // Whether or not a line comment token continues a line is controlled by
2559  // the method continuesLineCommentSection, with the following caveat:
2560  //
2561  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2562  // that each comment line from the trail is aligned with the next token, if
2563  // the next token exists. If a trail exists, the beginning of the maximal
2564  // trail is marked as a start of a new comment section.
2565  //
2566  // For example in this code:
2567  //
2568  // int a; // line about a
2569  // // line 1 about b
2570  // // line 2 about b
2571  // int b;
2572  //
2573  // the two lines about b form a maximal trail, so there are two sections, the
2574  // first one consisting of the single comment "// line about a" and the
2575  // second one consisting of the next two comments.
2576  if (Comments.empty())
2577  return;
2578  bool ShouldPushCommentsInCurrentLine = true;
2579  bool HasTrailAlignedWithNextToken = false;
2580  unsigned StartOfTrailAlignedWithNextToken = 0;
2581  if (NextTok) {
2582  // We are skipping the first element intentionally.
2583  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2584  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2585  HasTrailAlignedWithNextToken = true;
2586  StartOfTrailAlignedWithNextToken = i;
2587  }
2588  }
2589  }
2590  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2591  FormatToken *FormatTok = Comments[i];
2592  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2593  FormatTok->ContinuesLineCommentSection = false;
2594  } else {
2595  FormatTok->ContinuesLineCommentSection =
2596  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2597  }
2598  if (!FormatTok->ContinuesLineCommentSection &&
2599  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2600  ShouldPushCommentsInCurrentLine = false;
2601  }
2602  if (ShouldPushCommentsInCurrentLine) {
2603  pushToken(FormatTok);
2604  } else {
2605  CommentsBeforeNextToken.push_back(FormatTok);
2606  }
2607  }
2608 }
2609 
2610 void UnwrappedLineParser::readToken(int LevelDifference) {
2612  do {
2613  FormatTok = Tokens->getNextToken();
2614  assert(FormatTok);
2615  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2616  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2617  distributeComments(Comments, FormatTok);
2618  Comments.clear();
2619  // If there is an unfinished unwrapped line, we flush the preprocessor
2620  // directives only after that unwrapped line was finished later.
2621  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2622  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2623  assert((LevelDifference >= 0 ||
2624  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2625  "LevelDifference makes Line->Level negative");
2626  Line->Level += LevelDifference;
2627  // Comments stored before the preprocessor directive need to be output
2628  // before the preprocessor directive, at the same level as the
2629  // preprocessor directive, as we consider them to apply to the directive.
2630  flushComments(isOnNewLine(*FormatTok));
2631  parsePPDirective();
2632  }
2633  while (FormatTok->Type == TT_ConflictStart ||
2634  FormatTok->Type == TT_ConflictEnd ||
2635  FormatTok->Type == TT_ConflictAlternative) {
2636  if (FormatTok->Type == TT_ConflictStart) {
2637  conditionalCompilationStart(/*Unreachable=*/false);
2638  } else if (FormatTok->Type == TT_ConflictAlternative) {
2639  conditionalCompilationAlternative();
2640  } else if (FormatTok->Type == TT_ConflictEnd) {
2641  conditionalCompilationEnd();
2642  }
2643  FormatTok = Tokens->getNextToken();
2644  FormatTok->MustBreakBefore = true;
2645  }
2646 
2647  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2648  !Line->InPPDirective) {
2649  continue;
2650  }
2651 
2652  if (!FormatTok->Tok.is(tok::comment)) {
2653  distributeComments(Comments, FormatTok);
2654  Comments.clear();
2655  return;
2656  }
2657 
2658  Comments.push_back(FormatTok);
2659  } while (!eof());
2660 
2661  distributeComments(Comments, nullptr);
2662  Comments.clear();
2663 }
2664 
2665 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2666  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2667  if (MustBreakBeforeNextToken) {
2668  Line->Tokens.back().Tok->MustBreakBefore = true;
2669  MustBreakBeforeNextToken = false;
2670  }
2671 }
2672 
2673 } // end namespace format
2674 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:730
Indent in all namespaces.
Definition: Format.h:1319
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:60
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:128
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1232
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1082
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:215
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:744
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:160
Does not indent any directives.
Definition: Format.h:1094
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:413
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1107
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:134
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:381
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:702
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:776
Should be used for Java.
Definition: Format.h:1225
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:290
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:323
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1323
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:504
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:316
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1227
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:825
bool AfterFunction
Wrap function definitions.
Definition: Format.h:682
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:124
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:307
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:660
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:141
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:172
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:50
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1309
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1242
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:305
Should be used for TableGen code.
Definition: Format.h:1234
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:716
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:336
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:654
Indents directives after the hash.
Definition: Format.h:1103
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2532
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:166
bool AfterClass
Wrap class definitions.
Definition: Format.h:636
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1237
StringRef Text
Definition: Format.cpp:1621
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:284
bool isStringLiteral() const
Definition: FormatToken.h:347
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:698
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:138
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:178
const FormatStyle & Style