clang  5.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
54  UnwrappedLine &Line;
55  std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59  return FormatTok.is(tok::comment) &&
60  FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67  const FormatToken *Previous,
68  const FormatToken *MinColumnToken) {
69  if (!Previous || !MinColumnToken)
70  return false;
71  unsigned MinContinueColumn =
72  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74  isLineComment(*Previous) &&
75  FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81  FormatToken *&ResetToken)
82  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84  Token(nullptr), PreviousToken(nullptr) {
85  TokenSource = this;
86  Line.Level = 0;
87  Line.InPPDirective = true;
88  }
89 
90  ~ScopedMacroState() override {
91  TokenSource = PreviousTokenSource;
92  ResetToken = Token;
93  Line.InPPDirective = false;
94  Line.Level = PreviousLineLevel;
95  }
96 
97  FormatToken *getNextToken() override {
98  // The \c UnwrappedLineParser guards against this by never calling
99  // \c getNextToken() after it has encountered the first eof token.
100  assert(!eof());
101  PreviousToken = Token;
102  Token = PreviousTokenSource->getNextToken();
103  if (eof())
104  return getFakeEOF();
105  return Token;
106  }
107 
108  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110  FormatToken *setPosition(unsigned Position) override {
111  PreviousToken = nullptr;
112  Token = PreviousTokenSource->setPosition(Position);
113  return Token;
114  }
115 
116 private:
117  bool eof() {
118  return Token && Token->HasUnescapedNewline &&
119  !continuesLineComment(*Token, PreviousToken,
120  /*MinColumnToken=*/PreviousToken);
121  }
122 
123  FormatToken *getFakeEOF() {
124  static bool EOFInitialized = false;
125  static FormatToken FormatTok;
126  if (!EOFInitialized) {
127  FormatTok.Tok.startToken();
128  FormatTok.Tok.setKind(tok::eof);
129  EOFInitialized = true;
130  }
131  return &FormatTok;
132  }
133 
134  UnwrappedLine &Line;
135  FormatTokenSource *&TokenSource;
136  FormatToken *&ResetToken;
137  unsigned PreviousLineLevel;
138  FormatTokenSource *PreviousTokenSource;
139 
141  FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
147 public:
149  bool SwitchToPreprocessorLines = false)
150  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151  if (SwitchToPreprocessorLines)
152  Parser.CurrentLines = &Parser.PreprocessorDirectives;
153  else if (!Parser.Line->Tokens.empty())
154  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155  PreBlockLine = std::move(Parser.Line);
156  Parser.Line = llvm::make_unique<UnwrappedLine>();
157  Parser.Line->Level = PreBlockLine->Level;
158  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159  }
160 
162  if (!Parser.Line->Tokens.empty()) {
163  Parser.addUnwrappedLine();
164  }
165  assert(Parser.Line->Tokens.empty());
166  Parser.Line = std::move(PreBlockLine);
167  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168  Parser.MustBreakBeforeNextToken = true;
169  Parser.CurrentLines = OriginalLines;
170  }
171 
172 private:
174 
175  std::unique_ptr<UnwrappedLine> PreBlockLine;
176  SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
180 public:
182  const FormatStyle &Style, unsigned &LineLevel)
183  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
185  Parser->addUnwrappedLine();
186  if (Style.BraceWrapping.IndentBraces)
187  ++LineLevel;
188  }
189  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192  unsigned &LineLevel;
193  unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201  : Tokens(Tokens), Position(-1) {}
202 
203  FormatToken *getNextToken() override {
204  ++Position;
205  return Tokens[Position];
206  }
207 
208  unsigned getPosition() override {
209  assert(Position >= 0);
210  return Position;
211  }
212 
213  FormatToken *setPosition(unsigned P) override {
214  Position = P;
215  return Tokens[Position];
216  }
217 
218  void reset() { Position = -1; }
219 
220 private:
222  int Position;
223 };
224 
225 } // end anonymous namespace
226 
228  const AdditionalKeywords &Keywords,
230  UnwrappedLineConsumer &Callback)
231  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
235 
236 void UnwrappedLineParser::reset() {
237  PPBranchLevel = -1;
238  Line.reset(new UnwrappedLine);
239  CommentsBeforeNextToken.clear();
240  FormatTok = nullptr;
241  MustBreakBeforeNextToken = false;
242  PreprocessorDirectives.clear();
243  CurrentLines = &Lines;
244  DeclarationScopeStack.clear();
245  PPStack.clear();
246 }
247 
249  IndexedTokenSource TokenSource(AllTokens);
250  do {
251  DEBUG(llvm::dbgs() << "----\n");
252  reset();
253  Tokens = &TokenSource;
254  TokenSource.reset();
255 
256  readToken();
257  parseFile();
258  // Create line with eof token.
259  pushToken(FormatTok);
260  addUnwrappedLine();
261 
262  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
263  E = Lines.end();
264  I != E; ++I) {
265  Callback.consumeUnwrappedLine(*I);
266  }
267  Callback.finishRun();
268  Lines.clear();
269  while (!PPLevelBranchIndex.empty() &&
270  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
271  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
272  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
273  }
274  if (!PPLevelBranchIndex.empty()) {
275  ++PPLevelBranchIndex.back();
276  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
277  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
278  }
279  } while (!PPLevelBranchIndex.empty());
280 }
281 
282 void UnwrappedLineParser::parseFile() {
283  // The top-level context in a file always has declarations, except for pre-
284  // processor directives and JavaScript files.
285  bool MustBeDeclaration =
286  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
287  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
288  MustBeDeclaration);
289  parseLevel(/*HasOpeningBrace=*/false);
290  // Make sure to format the remaining tokens.
291  flushComments(true);
292  addUnwrappedLine();
293 }
294 
295 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
296  bool SwitchLabelEncountered = false;
297  do {
298  tok::TokenKind kind = FormatTok->Tok.getKind();
299  if (FormatTok->Type == TT_MacroBlockBegin) {
300  kind = tok::l_brace;
301  } else if (FormatTok->Type == TT_MacroBlockEnd) {
302  kind = tok::r_brace;
303  }
304 
305  switch (kind) {
306  case tok::comment:
307  nextToken();
308  addUnwrappedLine();
309  break;
310  case tok::l_brace:
311  // FIXME: Add parameter whether this can happen - if this happens, we must
312  // be in a non-declaration context.
313  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
314  continue;
315  parseBlock(/*MustBeDeclaration=*/false);
316  addUnwrappedLine();
317  break;
318  case tok::r_brace:
319  if (HasOpeningBrace)
320  return;
321  nextToken();
322  addUnwrappedLine();
323  break;
324  case tok::kw_default:
325  case tok::kw_case:
326  if (!SwitchLabelEncountered &&
327  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
328  ++Line->Level;
329  SwitchLabelEncountered = true;
330  parseStructuralElement();
331  break;
332  default:
333  parseStructuralElement();
334  break;
335  }
336  } while (!eof());
337 }
338 
339 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
340  // We'll parse forward through the tokens until we hit
341  // a closing brace or eof - note that getNextToken() will
342  // parse macros, so this will magically work inside macro
343  // definitions, too.
344  unsigned StoredPosition = Tokens->getPosition();
345  FormatToken *Tok = FormatTok;
346  const FormatToken *PrevTok = getPreviousToken();
347  // Keep a stack of positions of lbrace tokens. We will
348  // update information about whether an lbrace starts a
349  // braced init list or a different block during the loop.
350  SmallVector<FormatToken *, 8> LBraceStack;
351  assert(Tok->Tok.is(tok::l_brace));
352  do {
353  // Get next non-comment token.
354  FormatToken *NextTok;
355  unsigned ReadTokens = 0;
356  do {
357  NextTok = Tokens->getNextToken();
358  ++ReadTokens;
359  } while (NextTok->is(tok::comment));
360 
361  switch (Tok->Tok.getKind()) {
362  case tok::l_brace:
363  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
364  PrevTok->is(tok::colon))
365  // A colon indicates this code is in a type, or a braced list following
366  // a label in an object literal ({a: {b: 1}}).
367  // The code below could be confused by semicolons between the individual
368  // members in a type member list, which would normally trigger BK_Block.
369  // In both cases, this must be parsed as an inline braced init.
370  Tok->BlockKind = BK_BracedInit;
371  else
372  Tok->BlockKind = BK_Unknown;
373  LBraceStack.push_back(Tok);
374  break;
375  case tok::r_brace:
376  if (LBraceStack.empty())
377  break;
378  if (LBraceStack.back()->BlockKind == BK_Unknown) {
379  bool ProbablyBracedList = false;
380  if (Style.Language == FormatStyle::LK_Proto) {
381  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
382  } else {
383  // Using OriginalColumn to distinguish between ObjC methods and
384  // binary operators is a bit hacky.
385  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
386  NextTok->OriginalColumn == 0;
387 
388  // If there is a comma, semicolon or right paren after the closing
389  // brace, we assume this is a braced initializer list. Note that
390  // regardless how we mark inner braces here, we will overwrite the
391  // BlockKind later if we parse a braced list (where all blocks
392  // inside are by default braced lists), or when we explicitly detect
393  // blocks (for example while parsing lambdas).
394  ProbablyBracedList =
396  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
397  Keywords.kw_as)) ||
398  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
399  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
400  tok::r_paren, tok::r_square, tok::l_brace,
401  tok::l_square, tok::ellipsis) ||
402  (NextTok->is(tok::identifier) &&
403  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
404  (NextTok->is(tok::semi) &&
405  (!ExpectClassBody || LBraceStack.size() != 1)) ||
406  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
407  }
408  if (ProbablyBracedList) {
409  Tok->BlockKind = BK_BracedInit;
410  LBraceStack.back()->BlockKind = BK_BracedInit;
411  } else {
412  Tok->BlockKind = BK_Block;
413  LBraceStack.back()->BlockKind = BK_Block;
414  }
415  }
416  LBraceStack.pop_back();
417  break;
418  case tok::at:
419  case tok::semi:
420  case tok::kw_if:
421  case tok::kw_while:
422  case tok::kw_for:
423  case tok::kw_switch:
424  case tok::kw_try:
425  case tok::kw___try:
426  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
427  LBraceStack.back()->BlockKind = BK_Block;
428  break;
429  default:
430  break;
431  }
432  PrevTok = Tok;
433  Tok = NextTok;
434  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
435 
436  // Assume other blocks for all unclosed opening braces.
437  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
438  if (LBraceStack[i]->BlockKind == BK_Unknown)
439  LBraceStack[i]->BlockKind = BK_Block;
440  }
441 
442  FormatTok = Tokens->setPosition(StoredPosition);
443 }
444 
445 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
446  bool MunchSemi) {
447  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
448  "'{' or macro block token expected");
449  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
450  FormatTok->BlockKind = BK_Block;
451 
452  unsigned InitialLevel = Line->Level;
453  nextToken();
454 
455  if (MacroBlock && FormatTok->is(tok::l_paren))
456  parseParens();
457 
458  addUnwrappedLine();
459  size_t OpeningLineIndex = CurrentLines->empty()
461  : (CurrentLines->size() - 1);
462 
463  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
464  MustBeDeclaration);
465  if (AddLevel)
466  ++Line->Level;
467  parseLevel(/*HasOpeningBrace=*/true);
468 
469  if (eof())
470  return;
471 
472  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
473  : !FormatTok->is(tok::r_brace)) {
474  Line->Level = InitialLevel;
475  FormatTok->BlockKind = BK_Block;
476  return;
477  }
478 
479  nextToken(); // Munch the closing brace.
480 
481  if (MacroBlock && FormatTok->is(tok::l_paren))
482  parseParens();
483 
484  if (MunchSemi && FormatTok->Tok.is(tok::semi))
485  nextToken();
486  Line->Level = InitialLevel;
487  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
488 }
489 
490 static bool isGoogScope(const UnwrappedLine &Line) {
491  // FIXME: Closure-library specific stuff should not be hard-coded but be
492  // configurable.
493  if (Line.Tokens.size() < 4)
494  return false;
495  auto I = Line.Tokens.begin();
496  if (I->Tok->TokenText != "goog")
497  return false;
498  ++I;
499  if (I->Tok->isNot(tok::period))
500  return false;
501  ++I;
502  if (I->Tok->TokenText != "scope")
503  return false;
504  ++I;
505  return I->Tok->is(tok::l_paren);
506 }
507 
508 static bool isIIFE(const UnwrappedLine &Line,
509  const AdditionalKeywords &Keywords) {
510  // Look for the start of an immediately invoked anonymous function.
511  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
512  // This is commonly done in JavaScript to create a new, anonymous scope.
513  // Example: (function() { ... })()
514  if (Line.Tokens.size() < 3)
515  return false;
516  auto I = Line.Tokens.begin();
517  if (I->Tok->isNot(tok::l_paren))
518  return false;
519  ++I;
520  if (I->Tok->isNot(Keywords.kw_function))
521  return false;
522  ++I;
523  return I->Tok->is(tok::l_paren);
524 }
525 
526 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
527  const FormatToken &InitialToken) {
528  if (InitialToken.is(tok::kw_namespace))
529  return Style.BraceWrapping.AfterNamespace;
530  if (InitialToken.is(tok::kw_class))
531  return Style.BraceWrapping.AfterClass;
532  if (InitialToken.is(tok::kw_union))
533  return Style.BraceWrapping.AfterUnion;
534  if (InitialToken.is(tok::kw_struct))
535  return Style.BraceWrapping.AfterStruct;
536  return false;
537 }
538 
539 void UnwrappedLineParser::parseChildBlock() {
540  FormatTok->BlockKind = BK_Block;
541  nextToken();
542  {
543  bool SkipIndent =
545  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
546  ScopedLineState LineState(*this);
547  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
548  /*MustBeDeclaration=*/false);
549  Line->Level += SkipIndent ? 0 : 1;
550  parseLevel(/*HasOpeningBrace=*/true);
551  flushComments(isOnNewLine(*FormatTok));
552  Line->Level -= SkipIndent ? 0 : 1;
553  }
554  nextToken();
555 }
556 
557 void UnwrappedLineParser::parsePPDirective() {
558  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
559  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
560  nextToken();
561 
562  if (!FormatTok->Tok.getIdentifierInfo()) {
563  parsePPUnknown();
564  return;
565  }
566 
567  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
568  case tok::pp_define:
569  parsePPDefine();
570  return;
571  case tok::pp_if:
572  parsePPIf(/*IfDef=*/false);
573  break;
574  case tok::pp_ifdef:
575  case tok::pp_ifndef:
576  parsePPIf(/*IfDef=*/true);
577  break;
578  case tok::pp_else:
579  parsePPElse();
580  break;
581  case tok::pp_elif:
582  parsePPElIf();
583  break;
584  case tok::pp_endif:
585  parsePPEndIf();
586  break;
587  default:
588  parsePPUnknown();
589  break;
590  }
591 }
592 
593 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
594  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
595  PPStack.push_back(PP_Unreachable);
596  else
597  PPStack.push_back(PP_Conditional);
598 }
599 
600 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
601  ++PPBranchLevel;
602  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
603  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
604  PPLevelBranchIndex.push_back(0);
605  PPLevelBranchCount.push_back(0);
606  }
607  PPChainBranchIndex.push(0);
608  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
609  conditionalCompilationCondition(Unreachable || Skip);
610 }
611 
612 void UnwrappedLineParser::conditionalCompilationAlternative() {
613  if (!PPStack.empty())
614  PPStack.pop_back();
615  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
616  if (!PPChainBranchIndex.empty())
617  ++PPChainBranchIndex.top();
618  conditionalCompilationCondition(
619  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
620  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
621 }
622 
623 void UnwrappedLineParser::conditionalCompilationEnd() {
624  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
625  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
626  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
627  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
628  }
629  }
630  // Guard against #endif's without #if.
631  if (PPBranchLevel > 0)
632  --PPBranchLevel;
633  if (!PPChainBranchIndex.empty())
634  PPChainBranchIndex.pop();
635  if (!PPStack.empty())
636  PPStack.pop_back();
637 }
638 
639 void UnwrappedLineParser::parsePPIf(bool IfDef) {
640  bool IfNDef = FormatTok->is(tok::pp_ifndef);
641  nextToken();
642  bool Unreachable = false;
643  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
644  Unreachable = true;
645  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
646  Unreachable = true;
647  conditionalCompilationStart(Unreachable);
648  parsePPUnknown();
649 }
650 
651 void UnwrappedLineParser::parsePPElse() {
652  conditionalCompilationAlternative();
653  parsePPUnknown();
654 }
655 
656 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
657 
658 void UnwrappedLineParser::parsePPEndIf() {
659  conditionalCompilationEnd();
660  parsePPUnknown();
661 }
662 
663 void UnwrappedLineParser::parsePPDefine() {
664  nextToken();
665 
666  if (FormatTok->Tok.getKind() != tok::identifier) {
667  parsePPUnknown();
668  return;
669  }
670  nextToken();
671  if (FormatTok->Tok.getKind() == tok::l_paren &&
672  FormatTok->WhitespaceRange.getBegin() ==
673  FormatTok->WhitespaceRange.getEnd()) {
674  parseParens();
675  }
676  addUnwrappedLine();
677  Line->Level = 1;
678 
679  // Errors during a preprocessor directive can only affect the layout of the
680  // preprocessor directive, and thus we ignore them. An alternative approach
681  // would be to use the same approach we use on the file level (no
682  // re-indentation if there was a structural error) within the macro
683  // definition.
684  parseFile();
685 }
686 
687 void UnwrappedLineParser::parsePPUnknown() {
688  do {
689  nextToken();
690  } while (!eof());
691  addUnwrappedLine();
692 }
693 
694 // Here we blacklist certain tokens that are not usually the first token in an
695 // unwrapped line. This is used in attempt to distinguish macro calls without
696 // trailing semicolons from other constructs split to several lines.
697 static bool tokenCanStartNewLine(const clang::Token &Tok) {
698  // Semicolon can be a null-statement, l_square can be a start of a macro or
699  // a C++11 attribute, but this doesn't seem to be common.
700  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
701  Tok.isNot(tok::l_square) &&
702  // Tokens that can only be used as binary operators and a part of
703  // overloaded operator names.
704  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
705  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
706  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
707  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
708  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
709  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
710  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
711  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
712  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
713  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
714  Tok.isNot(tok::lesslessequal) &&
715  // Colon is used in labels, base class lists, initializer lists,
716  // range-based for loops, ternary operator, but should never be the
717  // first token in an unwrapped line.
718  Tok.isNot(tok::colon) &&
719  // 'noexcept' is a trailing annotation.
720  Tok.isNot(tok::kw_noexcept);
721 }
722 
723 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
724  const FormatToken *FormatTok) {
725  // FIXME: This returns true for C/C++ keywords like 'struct'.
726  return FormatTok->is(tok::identifier) &&
727  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
728  !FormatTok->isOneOf(
729  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
730  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
731  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
732  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
733  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
734  Keywords.kw_instanceof, Keywords.kw_interface,
735  Keywords.kw_throws));
736 }
737 
738 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
739  const FormatToken *FormatTok) {
740  return FormatTok->Tok.isLiteral() ||
741  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
742  mustBeJSIdent(Keywords, FormatTok);
743 }
744 
745 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
746 // when encountered after a value (see mustBeJSIdentOrValue).
747 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
748  const FormatToken *FormatTok) {
749  return FormatTok->isOneOf(
750  tok::kw_return, Keywords.kw_yield,
751  // conditionals
752  tok::kw_if, tok::kw_else,
753  // loops
754  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
755  // switch/case
756  tok::kw_switch, tok::kw_case,
757  // exceptions
758  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
759  // declaration
760  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
761  Keywords.kw_async, Keywords.kw_function,
762  // import/export
763  Keywords.kw_import, tok::kw_export);
764 }
765 
766 // readTokenWithJavaScriptASI reads the next token and terminates the current
767 // line if JavaScript Automatic Semicolon Insertion must
768 // happen between the current token and the next token.
769 //
770 // This method is conservative - it cannot cover all edge cases of JavaScript,
771 // but only aims to correctly handle certain well known cases. It *must not*
772 // return true in speculative cases.
773 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
774  FormatToken *Previous = FormatTok;
775  readToken();
776  FormatToken *Next = FormatTok;
777 
778  bool IsOnSameLine =
779  CommentsBeforeNextToken.empty()
780  ? Next->NewlinesBefore == 0
781  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
782  if (IsOnSameLine)
783  return;
784 
785  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
786  bool PreviousStartsTemplateExpr =
787  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
788  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
789  // If the token before the previous one is an '@', the previous token is an
790  // annotation and can precede another identifier/value.
791  const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
792  if (PrePrevious->is(tok::at))
793  return;
794  }
795  if (Next->is(tok::exclaim) && PreviousMustBeValue)
796  return addUnwrappedLine();
797  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
798  bool NextEndsTemplateExpr =
799  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
800  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
801  (PreviousMustBeValue ||
802  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
803  tok::minusminus)))
804  return addUnwrappedLine();
805  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
806  return addUnwrappedLine();
807 }
808 
809 void UnwrappedLineParser::parseStructuralElement() {
810  assert(!FormatTok->is(tok::l_brace));
811  if (Style.Language == FormatStyle::LK_TableGen &&
812  FormatTok->is(tok::pp_include)) {
813  nextToken();
814  if (FormatTok->is(tok::string_literal))
815  nextToken();
816  addUnwrappedLine();
817  return;
818  }
819  switch (FormatTok->Tok.getKind()) {
820  case tok::at:
821  nextToken();
822  if (FormatTok->Tok.is(tok::l_brace)) {
823  parseBracedList();
824  break;
825  }
826  switch (FormatTok->Tok.getObjCKeywordID()) {
827  case tok::objc_public:
828  case tok::objc_protected:
829  case tok::objc_package:
830  case tok::objc_private:
831  return parseAccessSpecifier();
832  case tok::objc_interface:
833  case tok::objc_implementation:
834  return parseObjCInterfaceOrImplementation();
835  case tok::objc_protocol:
836  return parseObjCProtocol();
837  case tok::objc_end:
838  return; // Handled by the caller.
839  case tok::objc_optional:
840  case tok::objc_required:
841  nextToken();
842  addUnwrappedLine();
843  return;
844  case tok::objc_autoreleasepool:
845  nextToken();
846  if (FormatTok->Tok.is(tok::l_brace)) {
848  addUnwrappedLine();
849  parseBlock(/*MustBeDeclaration=*/false);
850  }
851  addUnwrappedLine();
852  return;
853  case tok::objc_try:
854  // This branch isn't strictly necessary (the kw_try case below would
855  // do this too after the tok::at is parsed above). But be explicit.
856  parseTryCatch();
857  return;
858  default:
859  break;
860  }
861  break;
862  case tok::kw_asm:
863  nextToken();
864  if (FormatTok->is(tok::l_brace)) {
865  FormatTok->Type = TT_InlineASMBrace;
866  nextToken();
867  while (FormatTok && FormatTok->isNot(tok::eof)) {
868  if (FormatTok->is(tok::r_brace)) {
869  FormatTok->Type = TT_InlineASMBrace;
870  nextToken();
871  addUnwrappedLine();
872  break;
873  }
874  FormatTok->Finalized = true;
875  nextToken();
876  }
877  }
878  break;
879  case tok::kw_namespace:
880  parseNamespace();
881  return;
882  case tok::kw_inline:
883  nextToken();
884  if (FormatTok->Tok.is(tok::kw_namespace)) {
885  parseNamespace();
886  return;
887  }
888  break;
889  case tok::kw_public:
890  case tok::kw_protected:
891  case tok::kw_private:
892  if (Style.Language == FormatStyle::LK_Java ||
894  nextToken();
895  else
896  parseAccessSpecifier();
897  return;
898  case tok::kw_if:
899  parseIfThenElse();
900  return;
901  case tok::kw_for:
902  case tok::kw_while:
903  parseForOrWhileLoop();
904  return;
905  case tok::kw_do:
906  parseDoWhile();
907  return;
908  case tok::kw_switch:
909  parseSwitch();
910  return;
911  case tok::kw_default:
912  nextToken();
913  parseLabel();
914  return;
915  case tok::kw_case:
916  parseCaseLabel();
917  return;
918  case tok::kw_try:
919  case tok::kw___try:
920  parseTryCatch();
921  return;
922  case tok::kw_extern:
923  nextToken();
924  if (FormatTok->Tok.is(tok::string_literal)) {
925  nextToken();
926  if (FormatTok->Tok.is(tok::l_brace)) {
927  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
928  addUnwrappedLine();
929  return;
930  }
931  }
932  break;
933  case tok::kw_export:
934  if (Style.Language == FormatStyle::LK_JavaScript) {
935  parseJavaScriptEs6ImportExport();
936  return;
937  }
938  break;
939  case tok::identifier:
940  if (FormatTok->is(TT_ForEachMacro)) {
941  parseForOrWhileLoop();
942  return;
943  }
944  if (FormatTok->is(TT_MacroBlockBegin)) {
945  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
946  /*MunchSemi=*/false);
947  return;
948  }
949  if (FormatTok->is(Keywords.kw_import)) {
950  if (Style.Language == FormatStyle::LK_JavaScript) {
951  parseJavaScriptEs6ImportExport();
952  return;
953  }
954  if (Style.Language == FormatStyle::LK_Proto) {
955  nextToken();
956  if (FormatTok->is(tok::kw_public))
957  nextToken();
958  if (!FormatTok->is(tok::string_literal))
959  return;
960  nextToken();
961  if (FormatTok->is(tok::semi))
962  nextToken();
963  addUnwrappedLine();
964  return;
965  }
966  }
967  if (Style.isCpp() &&
968  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
969  Keywords.kw_slots, Keywords.kw_qslots)) {
970  nextToken();
971  if (FormatTok->is(tok::colon)) {
972  nextToken();
973  addUnwrappedLine();
974  return;
975  }
976  }
977  // In all other cases, parse the declaration.
978  break;
979  default:
980  break;
981  }
982  do {
983  const FormatToken *Previous = getPreviousToken();
984  switch (FormatTok->Tok.getKind()) {
985  case tok::at:
986  nextToken();
987  if (FormatTok->Tok.is(tok::l_brace))
988  parseBracedList();
989  break;
990  case tok::kw_enum:
991  // Ignore if this is part of "template <enum ...".
992  if (Previous && Previous->is(tok::less)) {
993  nextToken();
994  break;
995  }
996 
997  // parseEnum falls through and does not yet add an unwrapped line as an
998  // enum definition can start a structural element.
999  if (!parseEnum())
1000  break;
1001  // This only applies for C++.
1002  if (!Style.isCpp()) {
1003  addUnwrappedLine();
1004  return;
1005  }
1006  break;
1007  case tok::kw_typedef:
1008  nextToken();
1009  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1010  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1011  parseEnum();
1012  break;
1013  case tok::kw_struct:
1014  case tok::kw_union:
1015  case tok::kw_class:
1016  // parseRecord falls through and does not yet add an unwrapped line as a
1017  // record declaration or definition can start a structural element.
1018  parseRecord();
1019  // This does not apply for Java and JavaScript.
1020  if (Style.Language == FormatStyle::LK_Java ||
1022  if (FormatTok->is(tok::semi))
1023  nextToken();
1024  addUnwrappedLine();
1025  return;
1026  }
1027  break;
1028  case tok::period:
1029  nextToken();
1030  // In Java, classes have an implicit static member "class".
1031  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1032  FormatTok->is(tok::kw_class))
1033  nextToken();
1034  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1035  FormatTok->Tok.getIdentifierInfo())
1036  // JavaScript only has pseudo keywords, all keywords are allowed to
1037  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1038  nextToken();
1039  break;
1040  case tok::semi:
1041  nextToken();
1042  addUnwrappedLine();
1043  return;
1044  case tok::r_brace:
1045  addUnwrappedLine();
1046  return;
1047  case tok::l_paren:
1048  parseParens();
1049  break;
1050  case tok::kw_operator:
1051  nextToken();
1052  if (FormatTok->isBinaryOperator())
1053  nextToken();
1054  break;
1055  case tok::caret:
1056  nextToken();
1057  if (FormatTok->Tok.isAnyIdentifier() ||
1058  FormatTok->isSimpleTypeSpecifier())
1059  nextToken();
1060  if (FormatTok->is(tok::l_paren))
1061  parseParens();
1062  if (FormatTok->is(tok::l_brace))
1063  parseChildBlock();
1064  break;
1065  case tok::l_brace:
1066  if (!tryToParseBracedList()) {
1067  // A block outside of parentheses must be the last part of a
1068  // structural element.
1069  // FIXME: Figure out cases where this is not true, and add projections
1070  // for them (the one we know is missing are lambdas).
1071  if (Style.BraceWrapping.AfterFunction)
1072  addUnwrappedLine();
1073  FormatTok->Type = TT_FunctionLBrace;
1074  parseBlock(/*MustBeDeclaration=*/false);
1075  addUnwrappedLine();
1076  return;
1077  }
1078  // Otherwise this was a braced init list, and the structural
1079  // element continues.
1080  break;
1081  case tok::kw_try:
1082  // We arrive here when parsing function-try blocks.
1083  parseTryCatch();
1084  return;
1085  case tok::identifier: {
1086  if (FormatTok->is(TT_MacroBlockEnd)) {
1087  addUnwrappedLine();
1088  return;
1089  }
1090 
1091  // Function declarations (as opposed to function expressions) are parsed
1092  // on their own unwrapped line by continuing this loop. Function
1093  // expressions (functions that are not on their own line) must not create
1094  // a new unwrapped line, so they are special cased below.
1095  size_t TokenCount = Line->Tokens.size();
1096  if (Style.Language == FormatStyle::LK_JavaScript &&
1097  FormatTok->is(Keywords.kw_function) &&
1098  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1099  Keywords.kw_async)))) {
1100  tryToParseJSFunction();
1101  break;
1102  }
1103  if ((Style.Language == FormatStyle::LK_JavaScript ||
1104  Style.Language == FormatStyle::LK_Java) &&
1105  FormatTok->is(Keywords.kw_interface)) {
1106  if (Style.Language == FormatStyle::LK_JavaScript) {
1107  // In JavaScript/TypeScript, "interface" can be used as a standalone
1108  // identifier, e.g. in `var interface = 1;`. If "interface" is
1109  // followed by another identifier, it is very like to be an actual
1110  // interface declaration.
1111  unsigned StoredPosition = Tokens->getPosition();
1112  FormatToken *Next = Tokens->getNextToken();
1113  FormatTok = Tokens->setPosition(StoredPosition);
1114  if (Next && !mustBeJSIdent(Keywords, Next)) {
1115  nextToken();
1116  break;
1117  }
1118  }
1119  parseRecord();
1120  addUnwrappedLine();
1121  return;
1122  }
1123 
1124  // See if the following token should start a new unwrapped line.
1125  StringRef Text = FormatTok->TokenText;
1126  nextToken();
1127  if (Line->Tokens.size() == 1 &&
1128  // JS doesn't have macros, and within classes colons indicate fields,
1129  // not labels.
1131  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1132  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1133  parseLabel();
1134  return;
1135  }
1136  // Recognize function-like macro usages without trailing semicolon as
1137  // well as free-standing macros like Q_OBJECT.
1138  bool FunctionLike = FormatTok->is(tok::l_paren);
1139  if (FunctionLike)
1140  parseParens();
1141 
1142  bool FollowedByNewline =
1143  CommentsBeforeNextToken.empty()
1144  ? FormatTok->NewlinesBefore > 0
1145  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1146 
1147  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1148  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1149  addUnwrappedLine();
1150  return;
1151  }
1152  }
1153  break;
1154  }
1155  case tok::equal:
1156  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1157  // TT_JsFatArrow. The always start an expression or a child block if
1158  // followed by a curly.
1159  if (FormatTok->is(TT_JsFatArrow)) {
1160  nextToken();
1161  if (FormatTok->is(tok::l_brace))
1162  parseChildBlock();
1163  break;
1164  }
1165 
1166  nextToken();
1167  if (FormatTok->Tok.is(tok::l_brace)) {
1168  parseBracedList();
1169  }
1170  break;
1171  case tok::l_square:
1172  parseSquare();
1173  break;
1174  case tok::kw_new:
1175  parseNew();
1176  break;
1177  default:
1178  nextToken();
1179  break;
1180  }
1181  } while (!eof());
1182 }
1183 
1184 bool UnwrappedLineParser::tryToParseLambda() {
1185  if (!Style.isCpp()) {
1186  nextToken();
1187  return false;
1188  }
1189  const FormatToken* Previous = getPreviousToken();
1190  if (Previous &&
1191  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1192  tok::kw_delete) ||
1193  Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1194  nextToken();
1195  return false;
1196  }
1197  assert(FormatTok->is(tok::l_square));
1198  FormatToken &LSquare = *FormatTok;
1199  if (!tryToParseLambdaIntroducer())
1200  return false;
1201 
1202  while (FormatTok->isNot(tok::l_brace)) {
1203  if (FormatTok->isSimpleTypeSpecifier()) {
1204  nextToken();
1205  continue;
1206  }
1207  switch (FormatTok->Tok.getKind()) {
1208  case tok::l_brace:
1209  break;
1210  case tok::l_paren:
1211  parseParens();
1212  break;
1213  case tok::amp:
1214  case tok::star:
1215  case tok::kw_const:
1216  case tok::comma:
1217  case tok::less:
1218  case tok::greater:
1219  case tok::identifier:
1220  case tok::numeric_constant:
1221  case tok::coloncolon:
1222  case tok::kw_mutable:
1223  nextToken();
1224  break;
1225  case tok::arrow:
1226  FormatTok->Type = TT_LambdaArrow;
1227  nextToken();
1228  break;
1229  default:
1230  return true;
1231  }
1232  }
1233  LSquare.Type = TT_LambdaLSquare;
1234  parseChildBlock();
1235  return true;
1236 }
1237 
1238 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1239  nextToken();
1240  if (FormatTok->is(tok::equal)) {
1241  nextToken();
1242  if (FormatTok->is(tok::r_square)) {
1243  nextToken();
1244  return true;
1245  }
1246  if (FormatTok->isNot(tok::comma))
1247  return false;
1248  nextToken();
1249  } else if (FormatTok->is(tok::amp)) {
1250  nextToken();
1251  if (FormatTok->is(tok::r_square)) {
1252  nextToken();
1253  return true;
1254  }
1255  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1256  return false;
1257  }
1258  if (FormatTok->is(tok::comma))
1259  nextToken();
1260  } else if (FormatTok->is(tok::r_square)) {
1261  nextToken();
1262  return true;
1263  }
1264  do {
1265  if (FormatTok->is(tok::amp))
1266  nextToken();
1267  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1268  return false;
1269  nextToken();
1270  if (FormatTok->is(tok::ellipsis))
1271  nextToken();
1272  if (FormatTok->is(tok::comma)) {
1273  nextToken();
1274  } else if (FormatTok->is(tok::r_square)) {
1275  nextToken();
1276  return true;
1277  } else {
1278  return false;
1279  }
1280  } while (!eof());
1281  return false;
1282 }
1283 
1284 void UnwrappedLineParser::tryToParseJSFunction() {
1285  assert(FormatTok->is(Keywords.kw_function) ||
1286  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1287  if (FormatTok->is(Keywords.kw_async))
1288  nextToken();
1289  // Consume "function".
1290  nextToken();
1291 
1292  // Consume * (generator function). Treat it like C++'s overloaded operators.
1293  if (FormatTok->is(tok::star)) {
1294  FormatTok->Type = TT_OverloadedOperator;
1295  nextToken();
1296  }
1297 
1298  // Consume function name.
1299  if (FormatTok->is(tok::identifier))
1300  nextToken();
1301 
1302  if (FormatTok->isNot(tok::l_paren))
1303  return;
1304 
1305  // Parse formal parameter list.
1306  parseParens();
1307 
1308  if (FormatTok->is(tok::colon)) {
1309  // Parse a type definition.
1310  nextToken();
1311 
1312  // Eat the type declaration. For braced inline object types, balance braces,
1313  // otherwise just parse until finding an l_brace for the function body.
1314  if (FormatTok->is(tok::l_brace))
1315  tryToParseBracedList();
1316  else
1317  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1318  nextToken();
1319  }
1320 
1321  if (FormatTok->is(tok::semi))
1322  return;
1323 
1324  parseChildBlock();
1325 }
1326 
1327 bool UnwrappedLineParser::tryToParseBracedList() {
1328  if (FormatTok->BlockKind == BK_Unknown)
1329  calculateBraceTypes();
1330  assert(FormatTok->BlockKind != BK_Unknown);
1331  if (FormatTok->BlockKind == BK_Block)
1332  return false;
1333  parseBracedList();
1334  return true;
1335 }
1336 
1337 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1338  bool HasError = false;
1339  nextToken();
1340 
1341  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1342  // replace this by using parseAssigmentExpression() inside.
1343  do {
1344  if (Style.Language == FormatStyle::LK_JavaScript) {
1345  if (FormatTok->is(Keywords.kw_function) ||
1346  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1347  tryToParseJSFunction();
1348  continue;
1349  }
1350  if (FormatTok->is(TT_JsFatArrow)) {
1351  nextToken();
1352  // Fat arrows can be followed by simple expressions or by child blocks
1353  // in curly braces.
1354  if (FormatTok->is(tok::l_brace)) {
1355  parseChildBlock();
1356  continue;
1357  }
1358  }
1359  if (FormatTok->is(tok::l_brace)) {
1360  // Could be a method inside of a braced list `{a() { return 1; }}`.
1361  if (tryToParseBracedList())
1362  continue;
1363  parseChildBlock();
1364  }
1365  }
1366  switch (FormatTok->Tok.getKind()) {
1367  case tok::caret:
1368  nextToken();
1369  if (FormatTok->is(tok::l_brace)) {
1370  parseChildBlock();
1371  }
1372  break;
1373  case tok::l_square:
1374  tryToParseLambda();
1375  break;
1376  case tok::l_paren:
1377  parseParens();
1378  // JavaScript can just have free standing methods and getters/setters in
1379  // object literals. Detect them by a "{" following ")".
1380  if (Style.Language == FormatStyle::LK_JavaScript) {
1381  if (FormatTok->is(tok::l_brace))
1382  parseChildBlock();
1383  break;
1384  }
1385  break;
1386  case tok::l_brace:
1387  // Assume there are no blocks inside a braced init list apart
1388  // from the ones we explicitly parse out (like lambdas).
1389  FormatTok->BlockKind = BK_BracedInit;
1390  parseBracedList();
1391  break;
1392  case tok::r_brace:
1393  nextToken();
1394  return !HasError;
1395  case tok::semi:
1396  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1397  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1398  // used for error recovery if we have otherwise determined that this is
1399  // a braced list.
1400  if (Style.Language == FormatStyle::LK_JavaScript) {
1401  nextToken();
1402  break;
1403  }
1404  HasError = true;
1405  if (!ContinueOnSemicolons)
1406  return !HasError;
1407  nextToken();
1408  break;
1409  case tok::comma:
1410  nextToken();
1411  break;
1412  default:
1413  nextToken();
1414  break;
1415  }
1416  } while (!eof());
1417  return false;
1418 }
1419 
1420 void UnwrappedLineParser::parseParens() {
1421  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1422  nextToken();
1423  do {
1424  switch (FormatTok->Tok.getKind()) {
1425  case tok::l_paren:
1426  parseParens();
1427  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1428  parseChildBlock();
1429  break;
1430  case tok::r_paren:
1431  nextToken();
1432  return;
1433  case tok::r_brace:
1434  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1435  return;
1436  case tok::l_square:
1437  tryToParseLambda();
1438  break;
1439  case tok::l_brace:
1440  if (!tryToParseBracedList())
1441  parseChildBlock();
1442  break;
1443  case tok::at:
1444  nextToken();
1445  if (FormatTok->Tok.is(tok::l_brace))
1446  parseBracedList();
1447  break;
1448  case tok::kw_class:
1449  if (Style.Language == FormatStyle::LK_JavaScript)
1450  parseRecord(/*ParseAsExpr=*/true);
1451  else
1452  nextToken();
1453  break;
1454  case tok::identifier:
1455  if (Style.Language == FormatStyle::LK_JavaScript &&
1456  (FormatTok->is(Keywords.kw_function) ||
1457  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1458  tryToParseJSFunction();
1459  else
1460  nextToken();
1461  break;
1462  default:
1463  nextToken();
1464  break;
1465  }
1466  } while (!eof());
1467 }
1468 
1469 void UnwrappedLineParser::parseSquare() {
1470  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1471  if (tryToParseLambda())
1472  return;
1473  do {
1474  switch (FormatTok->Tok.getKind()) {
1475  case tok::l_paren:
1476  parseParens();
1477  break;
1478  case tok::r_square:
1479  nextToken();
1480  return;
1481  case tok::r_brace:
1482  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1483  return;
1484  case tok::l_square:
1485  parseSquare();
1486  break;
1487  case tok::l_brace: {
1488  if (!tryToParseBracedList())
1489  parseChildBlock();
1490  break;
1491  }
1492  case tok::at:
1493  nextToken();
1494  if (FormatTok->Tok.is(tok::l_brace))
1495  parseBracedList();
1496  break;
1497  default:
1498  nextToken();
1499  break;
1500  }
1501  } while (!eof());
1502 }
1503 
1504 void UnwrappedLineParser::parseIfThenElse() {
1505  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1506  nextToken();
1507  if (FormatTok->Tok.is(tok::l_paren))
1508  parseParens();
1509  bool NeedsUnwrappedLine = false;
1510  if (FormatTok->Tok.is(tok::l_brace)) {
1511  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1512  parseBlock(/*MustBeDeclaration=*/false);
1513  if (Style.BraceWrapping.BeforeElse)
1514  addUnwrappedLine();
1515  else
1516  NeedsUnwrappedLine = true;
1517  } else {
1518  addUnwrappedLine();
1519  ++Line->Level;
1520  parseStructuralElement();
1521  --Line->Level;
1522  }
1523  if (FormatTok->Tok.is(tok::kw_else)) {
1524  nextToken();
1525  if (FormatTok->Tok.is(tok::l_brace)) {
1526  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1527  parseBlock(/*MustBeDeclaration=*/false);
1528  addUnwrappedLine();
1529  } else if (FormatTok->Tok.is(tok::kw_if)) {
1530  parseIfThenElse();
1531  } else {
1532  addUnwrappedLine();
1533  ++Line->Level;
1534  parseStructuralElement();
1535  if (FormatTok->is(tok::eof))
1536  addUnwrappedLine();
1537  --Line->Level;
1538  }
1539  } else if (NeedsUnwrappedLine) {
1540  addUnwrappedLine();
1541  }
1542 }
1543 
1544 void UnwrappedLineParser::parseTryCatch() {
1545  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1546  nextToken();
1547  bool NeedsUnwrappedLine = false;
1548  if (FormatTok->is(tok::colon)) {
1549  // We are in a function try block, what comes is an initializer list.
1550  nextToken();
1551  while (FormatTok->is(tok::identifier)) {
1552  nextToken();
1553  if (FormatTok->is(tok::l_paren))
1554  parseParens();
1555  if (FormatTok->is(tok::comma))
1556  nextToken();
1557  }
1558  }
1559  // Parse try with resource.
1560  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1561  parseParens();
1562  }
1563  if (FormatTok->is(tok::l_brace)) {
1564  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1565  parseBlock(/*MustBeDeclaration=*/false);
1566  if (Style.BraceWrapping.BeforeCatch) {
1567  addUnwrappedLine();
1568  } else {
1569  NeedsUnwrappedLine = true;
1570  }
1571  } else if (!FormatTok->is(tok::kw_catch)) {
1572  // The C++ standard requires a compound-statement after a try.
1573  // If there's none, we try to assume there's a structuralElement
1574  // and try to continue.
1575  addUnwrappedLine();
1576  ++Line->Level;
1577  parseStructuralElement();
1578  --Line->Level;
1579  }
1580  while (1) {
1581  if (FormatTok->is(tok::at))
1582  nextToken();
1583  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1584  tok::kw___finally) ||
1585  ((Style.Language == FormatStyle::LK_Java ||
1587  FormatTok->is(Keywords.kw_finally)) ||
1588  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1589  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1590  break;
1591  nextToken();
1592  while (FormatTok->isNot(tok::l_brace)) {
1593  if (FormatTok->is(tok::l_paren)) {
1594  parseParens();
1595  continue;
1596  }
1597  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1598  return;
1599  nextToken();
1600  }
1601  NeedsUnwrappedLine = false;
1602  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1603  parseBlock(/*MustBeDeclaration=*/false);
1604  if (Style.BraceWrapping.BeforeCatch)
1605  addUnwrappedLine();
1606  else
1607  NeedsUnwrappedLine = true;
1608  }
1609  if (NeedsUnwrappedLine)
1610  addUnwrappedLine();
1611 }
1612 
1613 void UnwrappedLineParser::parseNamespace() {
1614  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1615 
1616  const FormatToken &InitialToken = *FormatTok;
1617  nextToken();
1618  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1619  nextToken();
1620  if (FormatTok->Tok.is(tok::l_brace)) {
1621  if (ShouldBreakBeforeBrace(Style, InitialToken))
1622  addUnwrappedLine();
1623 
1624  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1626  DeclarationScopeStack.size() > 1);
1627  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1628  // Munch the semicolon after a namespace. This is more common than one would
1629  // think. Puttin the semicolon into its own line is very ugly.
1630  if (FormatTok->Tok.is(tok::semi))
1631  nextToken();
1632  addUnwrappedLine();
1633  }
1634  // FIXME: Add error handling.
1635 }
1636 
1637 void UnwrappedLineParser::parseNew() {
1638  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1639  nextToken();
1640  if (Style.Language != FormatStyle::LK_Java)
1641  return;
1642 
1643  // In Java, we can parse everything up to the parens, which aren't optional.
1644  do {
1645  // There should not be a ;, { or } before the new's open paren.
1646  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1647  return;
1648 
1649  // Consume the parens.
1650  if (FormatTok->is(tok::l_paren)) {
1651  parseParens();
1652 
1653  // If there is a class body of an anonymous class, consume that as child.
1654  if (FormatTok->is(tok::l_brace))
1655  parseChildBlock();
1656  return;
1657  }
1658  nextToken();
1659  } while (!eof());
1660 }
1661 
1662 void UnwrappedLineParser::parseForOrWhileLoop() {
1663  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1664  "'for', 'while' or foreach macro expected");
1665  nextToken();
1666  // JS' for await ( ...
1667  if (Style.Language == FormatStyle::LK_JavaScript &&
1668  FormatTok->is(Keywords.kw_await))
1669  nextToken();
1670  if (FormatTok->Tok.is(tok::l_paren))
1671  parseParens();
1672  if (FormatTok->Tok.is(tok::l_brace)) {
1673  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1674  parseBlock(/*MustBeDeclaration=*/false);
1675  addUnwrappedLine();
1676  } else {
1677  addUnwrappedLine();
1678  ++Line->Level;
1679  parseStructuralElement();
1680  --Line->Level;
1681  }
1682 }
1683 
1684 void UnwrappedLineParser::parseDoWhile() {
1685  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1686  nextToken();
1687  if (FormatTok->Tok.is(tok::l_brace)) {
1688  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1689  parseBlock(/*MustBeDeclaration=*/false);
1690  if (Style.BraceWrapping.IndentBraces)
1691  addUnwrappedLine();
1692  } else {
1693  addUnwrappedLine();
1694  ++Line->Level;
1695  parseStructuralElement();
1696  --Line->Level;
1697  }
1698 
1699  // FIXME: Add error handling.
1700  if (!FormatTok->Tok.is(tok::kw_while)) {
1701  addUnwrappedLine();
1702  return;
1703  }
1704 
1705  nextToken();
1706  parseStructuralElement();
1707 }
1708 
1709 void UnwrappedLineParser::parseLabel() {
1710  nextToken();
1711  unsigned OldLineLevel = Line->Level;
1712  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1713  --Line->Level;
1714  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1715  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1716  parseBlock(/*MustBeDeclaration=*/false);
1717  if (FormatTok->Tok.is(tok::kw_break)) {
1719  addUnwrappedLine();
1720  parseStructuralElement();
1721  }
1722  addUnwrappedLine();
1723  } else {
1724  if (FormatTok->is(tok::semi))
1725  nextToken();
1726  addUnwrappedLine();
1727  }
1728  Line->Level = OldLineLevel;
1729  if (FormatTok->isNot(tok::l_brace)) {
1730  parseStructuralElement();
1731  addUnwrappedLine();
1732  }
1733 }
1734 
1735 void UnwrappedLineParser::parseCaseLabel() {
1736  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1737  // FIXME: fix handling of complex expressions here.
1738  do {
1739  nextToken();
1740  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1741  parseLabel();
1742 }
1743 
1744 void UnwrappedLineParser::parseSwitch() {
1745  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1746  nextToken();
1747  if (FormatTok->Tok.is(tok::l_paren))
1748  parseParens();
1749  if (FormatTok->Tok.is(tok::l_brace)) {
1750  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1751  parseBlock(/*MustBeDeclaration=*/false);
1752  addUnwrappedLine();
1753  } else {
1754  addUnwrappedLine();
1755  ++Line->Level;
1756  parseStructuralElement();
1757  --Line->Level;
1758  }
1759 }
1760 
1761 void UnwrappedLineParser::parseAccessSpecifier() {
1762  nextToken();
1763  // Understand Qt's slots.
1764  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1765  nextToken();
1766  // Otherwise, we don't know what it is, and we'd better keep the next token.
1767  if (FormatTok->Tok.is(tok::colon))
1768  nextToken();
1769  addUnwrappedLine();
1770 }
1771 
1772 bool UnwrappedLineParser::parseEnum() {
1773  // Won't be 'enum' for NS_ENUMs.
1774  if (FormatTok->Tok.is(tok::kw_enum))
1775  nextToken();
1776 
1777  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1778  // declarations. An "enum" keyword followed by a colon would be a syntax
1779  // error and thus assume it is just an identifier.
1780  if (Style.Language == FormatStyle::LK_JavaScript &&
1781  FormatTok->isOneOf(tok::colon, tok::question))
1782  return false;
1783 
1784  // Eat up enum class ...
1785  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1786  nextToken();
1787 
1788  while (FormatTok->Tok.getIdentifierInfo() ||
1789  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1790  tok::greater, tok::comma, tok::question)) {
1791  nextToken();
1792  // We can have macros or attributes in between 'enum' and the enum name.
1793  if (FormatTok->is(tok::l_paren))
1794  parseParens();
1795  if (FormatTok->is(tok::identifier)) {
1796  nextToken();
1797  // If there are two identifiers in a row, this is likely an elaborate
1798  // return type. In Java, this can be "implements", etc.
1799  if (Style.isCpp() && FormatTok->is(tok::identifier))
1800  return false;
1801  }
1802  }
1803 
1804  // Just a declaration or something is wrong.
1805  if (FormatTok->isNot(tok::l_brace))
1806  return true;
1807  FormatTok->BlockKind = BK_Block;
1808 
1809  if (Style.Language == FormatStyle::LK_Java) {
1810  // Java enums are different.
1811  parseJavaEnumBody();
1812  return true;
1813  }
1814  if (Style.Language == FormatStyle::LK_Proto) {
1815  parseBlock(/*MustBeDeclaration=*/true);
1816  return true;
1817  }
1818 
1819  // Parse enum body.
1820  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1821  if (HasError) {
1822  if (FormatTok->is(tok::semi))
1823  nextToken();
1824  addUnwrappedLine();
1825  }
1826  return true;
1827 
1828  // There is no addUnwrappedLine() here so that we fall through to parsing a
1829  // structural element afterwards. Thus, in "enum A {} n, m;",
1830  // "} n, m;" will end up in one unwrapped line.
1831 }
1832 
1833 void UnwrappedLineParser::parseJavaEnumBody() {
1834  // Determine whether the enum is simple, i.e. does not have a semicolon or
1835  // constants with class bodies. Simple enums can be formatted like braced
1836  // lists, contracted to a single line, etc.
1837  unsigned StoredPosition = Tokens->getPosition();
1838  bool IsSimple = true;
1839  FormatToken *Tok = Tokens->getNextToken();
1840  while (Tok) {
1841  if (Tok->is(tok::r_brace))
1842  break;
1843  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1844  IsSimple = false;
1845  break;
1846  }
1847  // FIXME: This will also mark enums with braces in the arguments to enum
1848  // constants as "not simple". This is probably fine in practice, though.
1849  Tok = Tokens->getNextToken();
1850  }
1851  FormatTok = Tokens->setPosition(StoredPosition);
1852 
1853  if (IsSimple) {
1854  parseBracedList();
1855  addUnwrappedLine();
1856  return;
1857  }
1858 
1859  // Parse the body of a more complex enum.
1860  // First add a line for everything up to the "{".
1861  nextToken();
1862  addUnwrappedLine();
1863  ++Line->Level;
1864 
1865  // Parse the enum constants.
1866  while (FormatTok) {
1867  if (FormatTok->is(tok::l_brace)) {
1868  // Parse the constant's class body.
1869  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1870  /*MunchSemi=*/false);
1871  } else if (FormatTok->is(tok::l_paren)) {
1872  parseParens();
1873  } else if (FormatTok->is(tok::comma)) {
1874  nextToken();
1875  addUnwrappedLine();
1876  } else if (FormatTok->is(tok::semi)) {
1877  nextToken();
1878  addUnwrappedLine();
1879  break;
1880  } else if (FormatTok->is(tok::r_brace)) {
1881  addUnwrappedLine();
1882  break;
1883  } else {
1884  nextToken();
1885  }
1886  }
1887 
1888  // Parse the class body after the enum's ";" if any.
1889  parseLevel(/*HasOpeningBrace=*/true);
1890  nextToken();
1891  --Line->Level;
1892  addUnwrappedLine();
1893 }
1894 
1895 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1896  const FormatToken &InitialToken = *FormatTok;
1897  nextToken();
1898 
1899  // The actual identifier can be a nested name specifier, and in macros
1900  // it is often token-pasted.
1901  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1902  tok::kw___attribute, tok::kw___declspec,
1903  tok::kw_alignas) ||
1904  ((Style.Language == FormatStyle::LK_Java ||
1906  FormatTok->isOneOf(tok::period, tok::comma))) {
1907  bool IsNonMacroIdentifier =
1908  FormatTok->is(tok::identifier) &&
1909  FormatTok->TokenText != FormatTok->TokenText.upper();
1910  nextToken();
1911  // We can have macros or attributes in between 'class' and the class name.
1912  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1913  parseParens();
1914  }
1915 
1916  // Note that parsing away template declarations here leads to incorrectly
1917  // accepting function declarations as record declarations.
1918  // In general, we cannot solve this problem. Consider:
1919  // class A<int> B() {}
1920  // which can be a function definition or a class definition when B() is a
1921  // macro. If we find enough real-world cases where this is a problem, we
1922  // can parse for the 'template' keyword in the beginning of the statement,
1923  // and thus rule out the record production in case there is no template
1924  // (this would still leave us with an ambiguity between template function
1925  // and class declarations).
1926  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1927  while (!eof()) {
1928  if (FormatTok->is(tok::l_brace)) {
1929  calculateBraceTypes(/*ExpectClassBody=*/true);
1930  if (!tryToParseBracedList())
1931  break;
1932  }
1933  if (FormatTok->Tok.is(tok::semi))
1934  return;
1935  nextToken();
1936  }
1937  }
1938  if (FormatTok->Tok.is(tok::l_brace)) {
1939  if (ParseAsExpr) {
1940  parseChildBlock();
1941  } else {
1942  if (ShouldBreakBeforeBrace(Style, InitialToken))
1943  addUnwrappedLine();
1944 
1945  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1946  /*MunchSemi=*/false);
1947  }
1948  }
1949  // There is no addUnwrappedLine() here so that we fall through to parsing a
1950  // structural element afterwards. Thus, in "class A {} n, m;",
1951  // "} n, m;" will end up in one unwrapped line.
1952 }
1953 
1954 void UnwrappedLineParser::parseObjCProtocolList() {
1955  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1956  do
1957  nextToken();
1958  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1959  nextToken(); // Skip '>'.
1960 }
1961 
1962 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1963  do {
1964  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1965  nextToken();
1966  addUnwrappedLine();
1967  break;
1968  }
1969  if (FormatTok->is(tok::l_brace)) {
1970  parseBlock(/*MustBeDeclaration=*/false);
1971  // In ObjC interfaces, nothing should be following the "}".
1972  addUnwrappedLine();
1973  } else if (FormatTok->is(tok::r_brace)) {
1974  // Ignore stray "}". parseStructuralElement doesn't consume them.
1975  nextToken();
1976  addUnwrappedLine();
1977  } else {
1978  parseStructuralElement();
1979  }
1980  } while (!eof());
1981 }
1982 
1983 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1984  nextToken();
1985  nextToken(); // interface name
1986 
1987  // @interface can be followed by either a base class, or a category.
1988  if (FormatTok->Tok.is(tok::colon)) {
1989  nextToken();
1990  nextToken(); // base class name
1991  } else if (FormatTok->Tok.is(tok::l_paren))
1992  // Skip category, if present.
1993  parseParens();
1994 
1995  if (FormatTok->Tok.is(tok::less))
1996  parseObjCProtocolList();
1997 
1998  if (FormatTok->Tok.is(tok::l_brace)) {
2000  addUnwrappedLine();
2001  parseBlock(/*MustBeDeclaration=*/true);
2002  }
2003 
2004  // With instance variables, this puts '}' on its own line. Without instance
2005  // variables, this ends the @interface line.
2006  addUnwrappedLine();
2007 
2008  parseObjCUntilAtEnd();
2009 }
2010 
2011 void UnwrappedLineParser::parseObjCProtocol() {
2012  nextToken();
2013  nextToken(); // protocol name
2014 
2015  if (FormatTok->Tok.is(tok::less))
2016  parseObjCProtocolList();
2017 
2018  // Check for protocol declaration.
2019  if (FormatTok->Tok.is(tok::semi)) {
2020  nextToken();
2021  return addUnwrappedLine();
2022  }
2023 
2024  addUnwrappedLine();
2025  parseObjCUntilAtEnd();
2026 }
2027 
2028 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2029  bool IsImport = FormatTok->is(Keywords.kw_import);
2030  assert(IsImport || FormatTok->is(tok::kw_export));
2031  nextToken();
2032 
2033  // Consume the "default" in "export default class/function".
2034  if (FormatTok->is(tok::kw_default))
2035  nextToken();
2036 
2037  // Consume "async function", "function" and "default function", so that these
2038  // get parsed as free-standing JS functions, i.e. do not require a trailing
2039  // semicolon.
2040  if (FormatTok->is(Keywords.kw_async))
2041  nextToken();
2042  if (FormatTok->is(Keywords.kw_function)) {
2043  nextToken();
2044  return;
2045  }
2046 
2047  // For imports, `export *`, `export {...}`, consume the rest of the line up
2048  // to the terminating `;`. For everything else, just return and continue
2049  // parsing the structural element, i.e. the declaration or expression for
2050  // `export default`.
2051  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2052  !FormatTok->isStringLiteral())
2053  return;
2054 
2055  while (!eof()) {
2056  if (FormatTok->is(tok::semi))
2057  return;
2058  if (Line->Tokens.size() == 0) {
2059  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2060  // import statement should terminate.
2061  return;
2062  }
2063  if (FormatTok->is(tok::l_brace)) {
2064  FormatTok->BlockKind = BK_Block;
2065  parseBracedList();
2066  } else {
2067  nextToken();
2068  }
2069  }
2070 }
2071 
2072 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2073  StringRef Prefix = "") {
2074  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2075  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2076  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2077  E = Line.Tokens.end();
2078  I != E; ++I) {
2079  llvm::dbgs() << I->Tok->Tok.getName() << "["
2080  << "T=" << I->Tok->Type
2081  << ", OC=" << I->Tok->OriginalColumn << "] ";
2082  }
2083  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2084  E = Line.Tokens.end();
2085  I != E; ++I) {
2086  const UnwrappedLineNode &Node = *I;
2088  I = Node.Children.begin(),
2089  E = Node.Children.end();
2090  I != E; ++I) {
2091  printDebugInfo(*I, "\nChild: ");
2092  }
2093  }
2094  llvm::dbgs() << "\n";
2095 }
2096 
2097 void UnwrappedLineParser::addUnwrappedLine() {
2098  if (Line->Tokens.empty())
2099  return;
2100  DEBUG({
2101  if (CurrentLines == &Lines)
2102  printDebugInfo(*Line);
2103  });
2104  CurrentLines->push_back(std::move(*Line));
2105  Line->Tokens.clear();
2106  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2107  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2108  CurrentLines->append(
2109  std::make_move_iterator(PreprocessorDirectives.begin()),
2110  std::make_move_iterator(PreprocessorDirectives.end()));
2111  PreprocessorDirectives.clear();
2112  }
2113 }
2114 
2115 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2116 
2117 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2118  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2119  FormatTok.NewlinesBefore > 0;
2120 }
2121 
2122 // Checks if \p FormatTok is a line comment that continues the line comment
2123 // section on \p Line.
2124 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2125  const UnwrappedLine &Line,
2126  llvm::Regex &CommentPragmasRegex) {
2127  if (Line.Tokens.empty())
2128  return false;
2129 
2130  StringRef IndentContent = FormatTok.TokenText;
2131  if (FormatTok.TokenText.startswith("//") ||
2132  FormatTok.TokenText.startswith("/*"))
2133  IndentContent = FormatTok.TokenText.substr(2);
2134  if (CommentPragmasRegex.match(IndentContent))
2135  return false;
2136 
2137  // If Line starts with a line comment, then FormatTok continues the comment
2138  // section if its original column is greater or equal to the original start
2139  // column of the line.
2140  //
2141  // Define the min column token of a line as follows: if a line ends in '{' or
2142  // contains a '{' followed by a line comment, then the min column token is
2143  // that '{'. Otherwise, the min column token of the line is the first token of
2144  // the line.
2145  //
2146  // If Line starts with a token other than a line comment, then FormatTok
2147  // continues the comment section if its original column is greater than the
2148  // original start column of the min column token of the line.
2149  //
2150  // For example, the second line comment continues the first in these cases:
2151  //
2152  // // first line
2153  // // second line
2154  //
2155  // and:
2156  //
2157  // // first line
2158  // // second line
2159  //
2160  // and:
2161  //
2162  // int i; // first line
2163  // // second line
2164  //
2165  // and:
2166  //
2167  // do { // first line
2168  // // second line
2169  // int i;
2170  // } while (true);
2171  //
2172  // and:
2173  //
2174  // enum {
2175  // a, // first line
2176  // // second line
2177  // b
2178  // };
2179  //
2180  // The second line comment doesn't continue the first in these cases:
2181  //
2182  // // first line
2183  // // second line
2184  //
2185  // and:
2186  //
2187  // int i; // first line
2188  // // second line
2189  //
2190  // and:
2191  //
2192  // do { // first line
2193  // // second line
2194  // int i;
2195  // } while (true);
2196  //
2197  // and:
2198  //
2199  // enum {
2200  // a, // first line
2201  // // second line
2202  // };
2203  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2204 
2205  // Scan for '{//'. If found, use the column of '{' as a min column for line
2206  // comment section continuation.
2207  const FormatToken *PreviousToken = nullptr;
2208  for (const UnwrappedLineNode &Node : Line.Tokens) {
2209  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2210  isLineComment(*Node.Tok)) {
2211  MinColumnToken = PreviousToken;
2212  break;
2213  }
2214  PreviousToken = Node.Tok;
2215 
2216  // Grab the last newline preceding a token in this unwrapped line.
2217  if (Node.Tok->NewlinesBefore > 0) {
2218  MinColumnToken = Node.Tok;
2219  }
2220  }
2221  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2222  MinColumnToken = PreviousToken;
2223  }
2224 
2225  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2226  MinColumnToken);
2227 }
2228 
2229 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2230  bool JustComments = Line->Tokens.empty();
2232  I = CommentsBeforeNextToken.begin(),
2233  E = CommentsBeforeNextToken.end();
2234  I != E; ++I) {
2235  // Line comments that belong to the same line comment section are put on the
2236  // same line since later we might want to reflow content between them.
2237  // Additional fine-grained breaking of line comment sections is controlled
2238  // by the class BreakableLineCommentSection in case it is desirable to keep
2239  // several line comment sections in the same unwrapped line.
2240  //
2241  // FIXME: Consider putting separate line comment sections as children to the
2242  // unwrapped line instead.
2243  (*I)->ContinuesLineCommentSection =
2244  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2245  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2246  addUnwrappedLine();
2247  pushToken(*I);
2248  }
2249  if (NewlineBeforeNext && JustComments)
2250  addUnwrappedLine();
2251  CommentsBeforeNextToken.clear();
2252 }
2253 
2254 void UnwrappedLineParser::nextToken() {
2255  if (eof())
2256  return;
2257  flushComments(isOnNewLine(*FormatTok));
2258  pushToken(FormatTok);
2259  if (Style.Language != FormatStyle::LK_JavaScript)
2260  readToken();
2261  else
2262  readTokenWithJavaScriptASI();
2263 }
2264 
2265 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2266  // FIXME: This is a dirty way to access the previous token. Find a better
2267  // solution.
2268  if (!Line || Line->Tokens.empty())
2269  return nullptr;
2270  return Line->Tokens.back().Tok;
2271 }
2272 
2273 void UnwrappedLineParser::distributeComments(
2274  const SmallVectorImpl<FormatToken *> &Comments,
2275  const FormatToken *NextTok) {
2276  // Whether or not a line comment token continues a line is controlled by
2277  // the method continuesLineCommentSection, with the following caveat:
2278  //
2279  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2280  // that each comment line from the trail is aligned with the next token, if
2281  // the next token exists. If a trail exists, the beginning of the maximal
2282  // trail is marked as a start of a new comment section.
2283  //
2284  // For example in this code:
2285  //
2286  // int a; // line about a
2287  // // line 1 about b
2288  // // line 2 about b
2289  // int b;
2290  //
2291  // the two lines about b form a maximal trail, so there are two sections, the
2292  // first one consisting of the single comment "// line about a" and the
2293  // second one consisting of the next two comments.
2294  if (Comments.empty())
2295  return;
2296  bool ShouldPushCommentsInCurrentLine = true;
2297  bool HasTrailAlignedWithNextToken = false;
2298  unsigned StartOfTrailAlignedWithNextToken = 0;
2299  if (NextTok) {
2300  // We are skipping the first element intentionally.
2301  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2302  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2303  HasTrailAlignedWithNextToken = true;
2304  StartOfTrailAlignedWithNextToken = i;
2305  }
2306  }
2307  }
2308  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2309  FormatToken *FormatTok = Comments[i];
2310  if (HasTrailAlignedWithNextToken &&
2311  i == StartOfTrailAlignedWithNextToken) {
2312  FormatTok->ContinuesLineCommentSection = false;
2313  } else {
2314  FormatTok->ContinuesLineCommentSection =
2315  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2316  }
2317  if (!FormatTok->ContinuesLineCommentSection &&
2318  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2319  ShouldPushCommentsInCurrentLine = false;
2320  }
2321  if (ShouldPushCommentsInCurrentLine) {
2322  pushToken(FormatTok);
2323  } else {
2324  CommentsBeforeNextToken.push_back(FormatTok);
2325  }
2326  }
2327 }
2328 
2329 void UnwrappedLineParser::readToken() {
2331  do {
2332  FormatTok = Tokens->getNextToken();
2333  assert(FormatTok);
2334  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2335  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2336  distributeComments(Comments, FormatTok);
2337  Comments.clear();
2338  // If there is an unfinished unwrapped line, we flush the preprocessor
2339  // directives only after that unwrapped line was finished later.
2340  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2341  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2342  // Comments stored before the preprocessor directive need to be output
2343  // before the preprocessor directive, at the same level as the
2344  // preprocessor directive, as we consider them to apply to the directive.
2345  flushComments(isOnNewLine(*FormatTok));
2346  parsePPDirective();
2347  }
2348  while (FormatTok->Type == TT_ConflictStart ||
2349  FormatTok->Type == TT_ConflictEnd ||
2350  FormatTok->Type == TT_ConflictAlternative) {
2351  if (FormatTok->Type == TT_ConflictStart) {
2352  conditionalCompilationStart(/*Unreachable=*/false);
2353  } else if (FormatTok->Type == TT_ConflictAlternative) {
2354  conditionalCompilationAlternative();
2355  } else if (FormatTok->Type == TT_ConflictEnd) {
2356  conditionalCompilationEnd();
2357  }
2358  FormatTok = Tokens->getNextToken();
2359  FormatTok->MustBreakBefore = true;
2360  }
2361 
2362  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2363  !Line->InPPDirective) {
2364  continue;
2365  }
2366 
2367  if (!FormatTok->Tok.is(tok::comment)) {
2368  distributeComments(Comments, FormatTok);
2369  Comments.clear();
2370  return;
2371  }
2372 
2373  Comments.push_back(FormatTok);
2374  } while (!eof());
2375 
2376  distributeComments(Comments, nullptr);
2377  Comments.clear();
2378 }
2379 
2380 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2381  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2382  if (MustBreakBeforeNextToken) {
2383  Line->Tokens.back().Tok->MustBreakBefore = true;
2384  MustBreakBeforeNextToken = false;
2385  }
2386 }
2387 
2388 } // end namespace format
2389 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:658
Indent in all namespaces.
Definition: Format.h:1131
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:121
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1047
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:951
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:212
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:153
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:385
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:127
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:355
bool AfterObjCDeclaration
Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
Definition: Format.h:630
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:690
Should be used for Java.
Definition: Format.h:1040
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:91
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:45
bool isNot(T Kind) const
Definition: FormatToken.h:310
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1135
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:303
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1042
ContinuationIndenter * Indenter
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:697
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterFunction
Wrap function definitions.
Definition: Format.h:612
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:117
SourceLocation getEnd() const
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:294
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:604
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:134
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:165
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:54
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:41
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1121
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1054
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
/file This file defines classes for searching and anlyzing source code clones.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:292
Should be used for TableGen code.
Definition: Format.h:1049
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:644
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:315
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:584
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:43
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:159
bool AfterClass
Wrap class definitions.
Definition: Format.h:566
StringRef Text
Definition: Format.cpp:1281
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:271
bool isStringLiteral() const
Definition: FormatToken.h:326
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:628
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:131
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:171
void startToken()
Reset all flags to cleared.
Definition: Token.h:169