clang  6.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
55  std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59  return FormatTok.is(tok::comment) &&
60  FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67  const FormatToken *Previous,
68  const FormatToken *MinColumnToken) {
69  if (!Previous || !MinColumnToken)
70  return false;
71  unsigned MinContinueColumn =
72  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74  isLineComment(*Previous) &&
75  FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81  FormatToken *&ResetToken)
82  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84  Token(nullptr), PreviousToken(nullptr) {
85  TokenSource = this;
86  Line.Level = 0;
87  Line.InPPDirective = true;
88  }
89 
90  ~ScopedMacroState() override {
91  TokenSource = PreviousTokenSource;
92  ResetToken = Token;
93  Line.InPPDirective = false;
94  Line.Level = PreviousLineLevel;
95  }
96 
97  FormatToken *getNextToken() override {
98  // The \c UnwrappedLineParser guards against this by never calling
99  // \c getNextToken() after it has encountered the first eof token.
100  assert(!eof());
101  PreviousToken = Token;
102  Token = PreviousTokenSource->getNextToken();
103  if (eof())
104  return getFakeEOF();
105  return Token;
106  }
107 
108  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110  FormatToken *setPosition(unsigned Position) override {
111  PreviousToken = nullptr;
112  Token = PreviousTokenSource->setPosition(Position);
113  return Token;
114  }
115 
116 private:
117  bool eof() {
118  return Token && Token->HasUnescapedNewline &&
119  !continuesLineComment(*Token, PreviousToken,
120  /*MinColumnToken=*/PreviousToken);
121  }
122 
123  FormatToken *getFakeEOF() {
124  static bool EOFInitialized = false;
125  static FormatToken FormatTok;
126  if (!EOFInitialized) {
127  FormatTok.Tok.startToken();
128  FormatTok.Tok.setKind(tok::eof);
129  EOFInitialized = true;
130  }
131  return &FormatTok;
132  }
133 
135  FormatTokenSource *&TokenSource;
136  FormatToken *&ResetToken;
137  unsigned PreviousLineLevel;
138  FormatTokenSource *PreviousTokenSource;
139 
141  FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
147 public:
149  bool SwitchToPreprocessorLines = false)
150  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151  if (SwitchToPreprocessorLines)
152  Parser.CurrentLines = &Parser.PreprocessorDirectives;
153  else if (!Parser.Line->Tokens.empty())
154  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155  PreBlockLine = std::move(Parser.Line);
156  Parser.Line = llvm::make_unique<UnwrappedLine>();
157  Parser.Line->Level = PreBlockLine->Level;
158  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159  }
160 
162  if (!Parser.Line->Tokens.empty()) {
163  Parser.addUnwrappedLine();
164  }
165  assert(Parser.Line->Tokens.empty());
166  Parser.Line = std::move(PreBlockLine);
167  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168  Parser.MustBreakBeforeNextToken = true;
169  Parser.CurrentLines = OriginalLines;
170  }
171 
172 private:
174 
175  std::unique_ptr<UnwrappedLine> PreBlockLine;
176  SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
180 public:
182  const FormatStyle &Style, unsigned &LineLevel)
183  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
185  Parser->addUnwrappedLine();
186  if (Style.BraceWrapping.IndentBraces)
187  ++LineLevel;
188  }
189  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192  unsigned &LineLevel;
193  unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201  : Tokens(Tokens), Position(-1) {}
202 
203  FormatToken *getNextToken() override {
204  ++Position;
205  return Tokens[Position];
206  }
207 
208  unsigned getPosition() override {
209  assert(Position >= 0);
210  return Position;
211  }
212 
213  FormatToken *setPosition(unsigned P) override {
214  Position = P;
215  return Tokens[Position];
216  }
217 
218  void reset() { Position = -1; }
219 
220 private:
222  int Position;
223 };
224 
225 } // end anonymous namespace
226 
228  const AdditionalKeywords &Keywords,
230  UnwrappedLineConsumer &Callback)
231  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
235 
236 void UnwrappedLineParser::reset() {
237  PPBranchLevel = -1;
238  Line.reset(new UnwrappedLine);
239  CommentsBeforeNextToken.clear();
240  FormatTok = nullptr;
241  MustBreakBeforeNextToken = false;
242  PreprocessorDirectives.clear();
243  CurrentLines = &Lines;
244  DeclarationScopeStack.clear();
245  PPStack.clear();
246 }
247 
249  IndexedTokenSource TokenSource(AllTokens);
250  do {
251  DEBUG(llvm::dbgs() << "----\n");
252  reset();
253  Tokens = &TokenSource;
254  TokenSource.reset();
255 
256  readToken();
257  parseFile();
258  // Create line with eof token.
259  pushToken(FormatTok);
260  addUnwrappedLine();
261 
262  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
263  E = Lines.end();
264  I != E; ++I) {
265  Callback.consumeUnwrappedLine(*I);
266  }
267  Callback.finishRun();
268  Lines.clear();
269  while (!PPLevelBranchIndex.empty() &&
270  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
271  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
272  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
273  }
274  if (!PPLevelBranchIndex.empty()) {
275  ++PPLevelBranchIndex.back();
276  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
277  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
278  }
279  } while (!PPLevelBranchIndex.empty());
280 }
281 
282 void UnwrappedLineParser::parseFile() {
283  // The top-level context in a file always has declarations, except for pre-
284  // processor directives and JavaScript files.
285  bool MustBeDeclaration =
286  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
287  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
288  MustBeDeclaration);
289  if (Style.Language == FormatStyle::LK_TextProto)
290  parseBracedList();
291  else
292  parseLevel(/*HasOpeningBrace=*/false);
293  // Make sure to format the remaining tokens.
294  flushComments(true);
295  addUnwrappedLine();
296 }
297 
298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
299  bool SwitchLabelEncountered = false;
300  do {
301  tok::TokenKind kind = FormatTok->Tok.getKind();
302  if (FormatTok->Type == TT_MacroBlockBegin) {
303  kind = tok::l_brace;
304  } else if (FormatTok->Type == TT_MacroBlockEnd) {
305  kind = tok::r_brace;
306  }
307 
308  switch (kind) {
309  case tok::comment:
310  nextToken();
311  addUnwrappedLine();
312  break;
313  case tok::l_brace:
314  // FIXME: Add parameter whether this can happen - if this happens, we must
315  // be in a non-declaration context.
316  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
317  continue;
318  parseBlock(/*MustBeDeclaration=*/false);
319  addUnwrappedLine();
320  break;
321  case tok::r_brace:
322  if (HasOpeningBrace)
323  return;
324  nextToken();
325  addUnwrappedLine();
326  break;
327  case tok::kw_default:
328  case tok::kw_case:
329  if (!SwitchLabelEncountered &&
330  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
331  ++Line->Level;
332  SwitchLabelEncountered = true;
333  parseStructuralElement();
334  break;
335  default:
336  parseStructuralElement();
337  break;
338  }
339  } while (!eof());
340 }
341 
342 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
343  // We'll parse forward through the tokens until we hit
344  // a closing brace or eof - note that getNextToken() will
345  // parse macros, so this will magically work inside macro
346  // definitions, too.
347  unsigned StoredPosition = Tokens->getPosition();
348  FormatToken *Tok = FormatTok;
349  const FormatToken *PrevTok = getPreviousToken();
350  // Keep a stack of positions of lbrace tokens. We will
351  // update information about whether an lbrace starts a
352  // braced init list or a different block during the loop.
353  SmallVector<FormatToken *, 8> LBraceStack;
354  assert(Tok->Tok.is(tok::l_brace));
355  do {
356  // Get next non-comment token.
357  FormatToken *NextTok;
358  unsigned ReadTokens = 0;
359  do {
360  NextTok = Tokens->getNextToken();
361  ++ReadTokens;
362  } while (NextTok->is(tok::comment));
363 
364  switch (Tok->Tok.getKind()) {
365  case tok::l_brace:
366  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
367  if (PrevTok->is(tok::colon))
368  // A colon indicates this code is in a type, or a braced list
369  // following a label in an object literal ({a: {b: 1}}). The code
370  // below could be confused by semicolons between the individual
371  // members in a type member list, which would normally trigger
372  // BK_Block. In both cases, this must be parsed as an inline braced
373  // init.
374  Tok->BlockKind = BK_BracedInit;
375  else if (PrevTok->is(tok::r_paren))
376  // `) { }` can only occur in function or method declarations in JS.
377  Tok->BlockKind = BK_Block;
378  } else {
379  Tok->BlockKind = BK_Unknown;
380  }
381  LBraceStack.push_back(Tok);
382  break;
383  case tok::r_brace:
384  if (LBraceStack.empty())
385  break;
386  if (LBraceStack.back()->BlockKind == BK_Unknown) {
387  bool ProbablyBracedList = false;
388  if (Style.Language == FormatStyle::LK_Proto) {
389  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
390  } else {
391  // Using OriginalColumn to distinguish between ObjC methods and
392  // binary operators is a bit hacky.
393  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
394  NextTok->OriginalColumn == 0;
395 
396  // If there is a comma, semicolon or right paren after the closing
397  // brace, we assume this is a braced initializer list. Note that
398  // regardless how we mark inner braces here, we will overwrite the
399  // BlockKind later if we parse a braced list (where all blocks
400  // inside are by default braced lists), or when we explicitly detect
401  // blocks (for example while parsing lambdas).
402  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
403  // braced list in JS.
404  ProbablyBracedList =
406  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
407  Keywords.kw_as)) ||
408  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
409  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
410  tok::r_paren, tok::r_square, tok::l_brace,
411  tok::l_square, tok::ellipsis) ||
412  (NextTok->is(tok::identifier) &&
413  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
414  (NextTok->is(tok::semi) &&
415  (!ExpectClassBody || LBraceStack.size() != 1)) ||
416  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
417  }
418  if (ProbablyBracedList) {
419  Tok->BlockKind = BK_BracedInit;
420  LBraceStack.back()->BlockKind = BK_BracedInit;
421  } else {
422  Tok->BlockKind = BK_Block;
423  LBraceStack.back()->BlockKind = BK_Block;
424  }
425  }
426  LBraceStack.pop_back();
427  break;
428  case tok::at:
429  case tok::semi:
430  case tok::kw_if:
431  case tok::kw_while:
432  case tok::kw_for:
433  case tok::kw_switch:
434  case tok::kw_try:
435  case tok::kw___try:
436  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
437  LBraceStack.back()->BlockKind = BK_Block;
438  break;
439  default:
440  break;
441  }
442  PrevTok = Tok;
443  Tok = NextTok;
444  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
445 
446  // Assume other blocks for all unclosed opening braces.
447  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
448  if (LBraceStack[i]->BlockKind == BK_Unknown)
449  LBraceStack[i]->BlockKind = BK_Block;
450  }
451 
452  FormatTok = Tokens->setPosition(StoredPosition);
453 }
454 
455 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
456  bool MunchSemi) {
457  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
458  "'{' or macro block token expected");
459  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
460  FormatTok->BlockKind = BK_Block;
461 
462  unsigned InitialLevel = Line->Level;
463  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
464 
465  if (MacroBlock && FormatTok->is(tok::l_paren))
466  parseParens();
467 
468  addUnwrappedLine();
469  size_t OpeningLineIndex = CurrentLines->empty()
471  : (CurrentLines->size() - 1);
472 
473  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
474  MustBeDeclaration);
475  if (AddLevel)
476  ++Line->Level;
477  parseLevel(/*HasOpeningBrace=*/true);
478 
479  if (eof())
480  return;
481 
482  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
483  : !FormatTok->is(tok::r_brace)) {
484  Line->Level = InitialLevel;
485  FormatTok->BlockKind = BK_Block;
486  return;
487  }
488 
489  // Munch the closing brace.
490  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
491 
492  if (MacroBlock && FormatTok->is(tok::l_paren))
493  parseParens();
494 
495  if (MunchSemi && FormatTok->Tok.is(tok::semi))
496  nextToken();
497  Line->Level = InitialLevel;
498  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
499  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
500  // Update the opening line to add the forward reference as well
501  (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
502  CurrentLines->size() - 1;
503  }
504 }
505 
506 static bool isGoogScope(const UnwrappedLine &Line) {
507  // FIXME: Closure-library specific stuff should not be hard-coded but be
508  // configurable.
509  if (Line.Tokens.size() < 4)
510  return false;
511  auto I = Line.Tokens.begin();
512  if (I->Tok->TokenText != "goog")
513  return false;
514  ++I;
515  if (I->Tok->isNot(tok::period))
516  return false;
517  ++I;
518  if (I->Tok->TokenText != "scope")
519  return false;
520  ++I;
521  return I->Tok->is(tok::l_paren);
522 }
523 
524 static bool isIIFE(const UnwrappedLine &Line,
525  const AdditionalKeywords &Keywords) {
526  // Look for the start of an immediately invoked anonymous function.
527  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
528  // This is commonly done in JavaScript to create a new, anonymous scope.
529  // Example: (function() { ... })()
530  if (Line.Tokens.size() < 3)
531  return false;
532  auto I = Line.Tokens.begin();
533  if (I->Tok->isNot(tok::l_paren))
534  return false;
535  ++I;
536  if (I->Tok->isNot(Keywords.kw_function))
537  return false;
538  ++I;
539  return I->Tok->is(tok::l_paren);
540 }
541 
542 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
543  const FormatToken &InitialToken) {
544  if (InitialToken.is(tok::kw_namespace))
545  return Style.BraceWrapping.AfterNamespace;
546  if (InitialToken.is(tok::kw_class))
547  return Style.BraceWrapping.AfterClass;
548  if (InitialToken.is(tok::kw_union))
549  return Style.BraceWrapping.AfterUnion;
550  if (InitialToken.is(tok::kw_struct))
551  return Style.BraceWrapping.AfterStruct;
552  return false;
553 }
554 
555 void UnwrappedLineParser::parseChildBlock() {
556  FormatTok->BlockKind = BK_Block;
557  nextToken();
558  {
559  bool SkipIndent =
561  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
562  ScopedLineState LineState(*this);
563  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
564  /*MustBeDeclaration=*/false);
565  Line->Level += SkipIndent ? 0 : 1;
566  parseLevel(/*HasOpeningBrace=*/true);
567  flushComments(isOnNewLine(*FormatTok));
568  Line->Level -= SkipIndent ? 0 : 1;
569  }
570  nextToken();
571 }
572 
573 void UnwrappedLineParser::parsePPDirective() {
574  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
575  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
576  nextToken();
577 
578  if (!FormatTok->Tok.getIdentifierInfo()) {
579  parsePPUnknown();
580  return;
581  }
582 
583  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
584  case tok::pp_define:
585  parsePPDefine();
586  return;
587  case tok::pp_if:
588  parsePPIf(/*IfDef=*/false);
589  break;
590  case tok::pp_ifdef:
591  case tok::pp_ifndef:
592  parsePPIf(/*IfDef=*/true);
593  break;
594  case tok::pp_else:
595  parsePPElse();
596  break;
597  case tok::pp_elif:
598  parsePPElIf();
599  break;
600  case tok::pp_endif:
601  parsePPEndIf();
602  break;
603  default:
604  parsePPUnknown();
605  break;
606  }
607 }
608 
609 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
610  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
611  PPStack.push_back(PP_Unreachable);
612  else
613  PPStack.push_back(PP_Conditional);
614 }
615 
616 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
617  ++PPBranchLevel;
618  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
619  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
620  PPLevelBranchIndex.push_back(0);
621  PPLevelBranchCount.push_back(0);
622  }
623  PPChainBranchIndex.push(0);
624  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
625  conditionalCompilationCondition(Unreachable || Skip);
626 }
627 
628 void UnwrappedLineParser::conditionalCompilationAlternative() {
629  if (!PPStack.empty())
630  PPStack.pop_back();
631  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
632  if (!PPChainBranchIndex.empty())
633  ++PPChainBranchIndex.top();
634  conditionalCompilationCondition(
635  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
636  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
637 }
638 
639 void UnwrappedLineParser::conditionalCompilationEnd() {
640  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
641  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
642  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
643  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
644  }
645  }
646  // Guard against #endif's without #if.
647  if (PPBranchLevel > 0)
648  --PPBranchLevel;
649  if (!PPChainBranchIndex.empty())
650  PPChainBranchIndex.pop();
651  if (!PPStack.empty())
652  PPStack.pop_back();
653 }
654 
655 void UnwrappedLineParser::parsePPIf(bool IfDef) {
656  bool IfNDef = FormatTok->is(tok::pp_ifndef);
657  nextToken();
658  bool Unreachable = false;
659  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
660  Unreachable = true;
661  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
662  Unreachable = true;
663  conditionalCompilationStart(Unreachable);
664  parsePPUnknown();
665 }
666 
667 void UnwrappedLineParser::parsePPElse() {
668  conditionalCompilationAlternative();
669  parsePPUnknown();
670 }
671 
672 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
673 
674 void UnwrappedLineParser::parsePPEndIf() {
675  conditionalCompilationEnd();
676  parsePPUnknown();
677 }
678 
679 void UnwrappedLineParser::parsePPDefine() {
680  nextToken();
681 
682  if (FormatTok->Tok.getKind() != tok::identifier) {
683  parsePPUnknown();
684  return;
685  }
686  nextToken();
687  if (FormatTok->Tok.getKind() == tok::l_paren &&
688  FormatTok->WhitespaceRange.getBegin() ==
689  FormatTok->WhitespaceRange.getEnd()) {
690  parseParens();
691  }
692  addUnwrappedLine();
693  Line->Level = 1;
694 
695  // Errors during a preprocessor directive can only affect the layout of the
696  // preprocessor directive, and thus we ignore them. An alternative approach
697  // would be to use the same approach we use on the file level (no
698  // re-indentation if there was a structural error) within the macro
699  // definition.
700  parseFile();
701 }
702 
703 void UnwrappedLineParser::parsePPUnknown() {
704  do {
705  nextToken();
706  } while (!eof());
707  addUnwrappedLine();
708 }
709 
710 // Here we blacklist certain tokens that are not usually the first token in an
711 // unwrapped line. This is used in attempt to distinguish macro calls without
712 // trailing semicolons from other constructs split to several lines.
713 static bool tokenCanStartNewLine(const clang::Token &Tok) {
714  // Semicolon can be a null-statement, l_square can be a start of a macro or
715  // a C++11 attribute, but this doesn't seem to be common.
716  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
717  Tok.isNot(tok::l_square) &&
718  // Tokens that can only be used as binary operators and a part of
719  // overloaded operator names.
720  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
721  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
722  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
723  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
724  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
725  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
726  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
727  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
728  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
729  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
730  Tok.isNot(tok::lesslessequal) &&
731  // Colon is used in labels, base class lists, initializer lists,
732  // range-based for loops, ternary operator, but should never be the
733  // first token in an unwrapped line.
734  Tok.isNot(tok::colon) &&
735  // 'noexcept' is a trailing annotation.
736  Tok.isNot(tok::kw_noexcept);
737 }
738 
739 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
740  const FormatToken *FormatTok) {
741  // FIXME: This returns true for C/C++ keywords like 'struct'.
742  return FormatTok->is(tok::identifier) &&
743  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
744  !FormatTok->isOneOf(
745  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
746  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
747  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
748  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
749  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
750  Keywords.kw_instanceof, Keywords.kw_interface,
751  Keywords.kw_throws, Keywords.kw_from));
752 }
753 
754 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
755  const FormatToken *FormatTok) {
756  return FormatTok->Tok.isLiteral() ||
757  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
758  mustBeJSIdent(Keywords, FormatTok);
759 }
760 
761 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
762 // when encountered after a value (see mustBeJSIdentOrValue).
763 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
764  const FormatToken *FormatTok) {
765  return FormatTok->isOneOf(
766  tok::kw_return, Keywords.kw_yield,
767  // conditionals
768  tok::kw_if, tok::kw_else,
769  // loops
770  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
771  // switch/case
772  tok::kw_switch, tok::kw_case,
773  // exceptions
774  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
775  // declaration
776  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
777  Keywords.kw_async, Keywords.kw_function,
778  // import/export
779  Keywords.kw_import, tok::kw_export);
780 }
781 
782 // readTokenWithJavaScriptASI reads the next token and terminates the current
783 // line if JavaScript Automatic Semicolon Insertion must
784 // happen between the current token and the next token.
785 //
786 // This method is conservative - it cannot cover all edge cases of JavaScript,
787 // but only aims to correctly handle certain well known cases. It *must not*
788 // return true in speculative cases.
789 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
790  FormatToken *Previous = FormatTok;
791  readToken();
792  FormatToken *Next = FormatTok;
793 
794  bool IsOnSameLine =
795  CommentsBeforeNextToken.empty()
796  ? Next->NewlinesBefore == 0
797  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
798  if (IsOnSameLine)
799  return;
800 
801  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
802  bool PreviousStartsTemplateExpr =
803  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
804  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
805  // If the token before the previous one is an '@', the previous token is an
806  // annotation and can precede another identifier/value.
807  const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
808  if (PrePrevious->is(tok::at))
809  return;
810  }
811  if (Next->is(tok::exclaim) && PreviousMustBeValue)
812  return addUnwrappedLine();
813  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
814  bool NextEndsTemplateExpr =
815  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
816  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
817  (PreviousMustBeValue ||
818  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
819  tok::minusminus)))
820  return addUnwrappedLine();
821  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
822  return addUnwrappedLine();
823 }
824 
825 void UnwrappedLineParser::parseStructuralElement() {
826  assert(!FormatTok->is(tok::l_brace));
827  if (Style.Language == FormatStyle::LK_TableGen &&
828  FormatTok->is(tok::pp_include)) {
829  nextToken();
830  if (FormatTok->is(tok::string_literal))
831  nextToken();
832  addUnwrappedLine();
833  return;
834  }
835  switch (FormatTok->Tok.getKind()) {
836  case tok::at:
837  nextToken();
838  if (FormatTok->Tok.is(tok::l_brace)) {
839  nextToken();
840  parseBracedList();
841  break;
842  }
843  switch (FormatTok->Tok.getObjCKeywordID()) {
844  case tok::objc_public:
845  case tok::objc_protected:
846  case tok::objc_package:
847  case tok::objc_private:
848  return parseAccessSpecifier();
849  case tok::objc_interface:
850  case tok::objc_implementation:
851  return parseObjCInterfaceOrImplementation();
852  case tok::objc_protocol:
853  return parseObjCProtocol();
854  case tok::objc_end:
855  return; // Handled by the caller.
856  case tok::objc_optional:
857  case tok::objc_required:
858  nextToken();
859  addUnwrappedLine();
860  return;
861  case tok::objc_autoreleasepool:
862  nextToken();
863  if (FormatTok->Tok.is(tok::l_brace)) {
865  addUnwrappedLine();
866  parseBlock(/*MustBeDeclaration=*/false);
867  }
868  addUnwrappedLine();
869  return;
870  case tok::objc_try:
871  // This branch isn't strictly necessary (the kw_try case below would
872  // do this too after the tok::at is parsed above). But be explicit.
873  parseTryCatch();
874  return;
875  default:
876  break;
877  }
878  break;
879  case tok::kw_asm:
880  nextToken();
881  if (FormatTok->is(tok::l_brace)) {
882  FormatTok->Type = TT_InlineASMBrace;
883  nextToken();
884  while (FormatTok && FormatTok->isNot(tok::eof)) {
885  if (FormatTok->is(tok::r_brace)) {
886  FormatTok->Type = TT_InlineASMBrace;
887  nextToken();
888  addUnwrappedLine();
889  break;
890  }
891  FormatTok->Finalized = true;
892  nextToken();
893  }
894  }
895  break;
896  case tok::kw_namespace:
897  parseNamespace();
898  return;
899  case tok::kw_inline:
900  nextToken();
901  if (FormatTok->Tok.is(tok::kw_namespace)) {
902  parseNamespace();
903  return;
904  }
905  break;
906  case tok::kw_public:
907  case tok::kw_protected:
908  case tok::kw_private:
909  if (Style.Language == FormatStyle::LK_Java ||
911  nextToken();
912  else
913  parseAccessSpecifier();
914  return;
915  case tok::kw_if:
916  parseIfThenElse();
917  return;
918  case tok::kw_for:
919  case tok::kw_while:
920  parseForOrWhileLoop();
921  return;
922  case tok::kw_do:
923  parseDoWhile();
924  return;
925  case tok::kw_switch:
926  parseSwitch();
927  return;
928  case tok::kw_default:
929  nextToken();
930  parseLabel();
931  return;
932  case tok::kw_case:
933  parseCaseLabel();
934  return;
935  case tok::kw_try:
936  case tok::kw___try:
937  parseTryCatch();
938  return;
939  case tok::kw_extern:
940  nextToken();
941  if (FormatTok->Tok.is(tok::string_literal)) {
942  nextToken();
943  if (FormatTok->Tok.is(tok::l_brace)) {
944  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
945  addUnwrappedLine();
946  return;
947  }
948  }
949  break;
950  case tok::kw_export:
951  if (Style.Language == FormatStyle::LK_JavaScript) {
952  parseJavaScriptEs6ImportExport();
953  return;
954  }
955  break;
956  case tok::identifier:
957  if (FormatTok->is(TT_ForEachMacro)) {
958  parseForOrWhileLoop();
959  return;
960  }
961  if (FormatTok->is(TT_MacroBlockBegin)) {
962  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
963  /*MunchSemi=*/false);
964  return;
965  }
966  if (FormatTok->is(Keywords.kw_import)) {
967  if (Style.Language == FormatStyle::LK_JavaScript) {
968  parseJavaScriptEs6ImportExport();
969  return;
970  }
971  if (Style.Language == FormatStyle::LK_Proto) {
972  nextToken();
973  if (FormatTok->is(tok::kw_public))
974  nextToken();
975  if (!FormatTok->is(tok::string_literal))
976  return;
977  nextToken();
978  if (FormatTok->is(tok::semi))
979  nextToken();
980  addUnwrappedLine();
981  return;
982  }
983  }
984  if (Style.isCpp() &&
985  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
986  Keywords.kw_slots, Keywords.kw_qslots)) {
987  nextToken();
988  if (FormatTok->is(tok::colon)) {
989  nextToken();
990  addUnwrappedLine();
991  return;
992  }
993  }
994  // In all other cases, parse the declaration.
995  break;
996  default:
997  break;
998  }
999  do {
1000  const FormatToken *Previous = getPreviousToken();
1001  switch (FormatTok->Tok.getKind()) {
1002  case tok::at:
1003  nextToken();
1004  if (FormatTok->Tok.is(tok::l_brace)) {
1005  nextToken();
1006  parseBracedList();
1007  }
1008  break;
1009  case tok::kw_enum:
1010  // Ignore if this is part of "template <enum ...".
1011  if (Previous && Previous->is(tok::less)) {
1012  nextToken();
1013  break;
1014  }
1015 
1016  // parseEnum falls through and does not yet add an unwrapped line as an
1017  // enum definition can start a structural element.
1018  if (!parseEnum())
1019  break;
1020  // This only applies for C++.
1021  if (!Style.isCpp()) {
1022  addUnwrappedLine();
1023  return;
1024  }
1025  break;
1026  case tok::kw_typedef:
1027  nextToken();
1028  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1029  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1030  parseEnum();
1031  break;
1032  case tok::kw_struct:
1033  case tok::kw_union:
1034  case tok::kw_class:
1035  // parseRecord falls through and does not yet add an unwrapped line as a
1036  // record declaration or definition can start a structural element.
1037  parseRecord();
1038  // This does not apply for Java and JavaScript.
1039  if (Style.Language == FormatStyle::LK_Java ||
1041  if (FormatTok->is(tok::semi))
1042  nextToken();
1043  addUnwrappedLine();
1044  return;
1045  }
1046  break;
1047  case tok::period:
1048  nextToken();
1049  // In Java, classes have an implicit static member "class".
1050  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1051  FormatTok->is(tok::kw_class))
1052  nextToken();
1053  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1054  FormatTok->Tok.getIdentifierInfo())
1055  // JavaScript only has pseudo keywords, all keywords are allowed to
1056  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1057  nextToken();
1058  break;
1059  case tok::semi:
1060  nextToken();
1061  addUnwrappedLine();
1062  return;
1063  case tok::r_brace:
1064  addUnwrappedLine();
1065  return;
1066  case tok::l_paren:
1067  parseParens();
1068  break;
1069  case tok::kw_operator:
1070  nextToken();
1071  if (FormatTok->isBinaryOperator())
1072  nextToken();
1073  break;
1074  case tok::caret:
1075  nextToken();
1076  if (FormatTok->Tok.isAnyIdentifier() ||
1077  FormatTok->isSimpleTypeSpecifier())
1078  nextToken();
1079  if (FormatTok->is(tok::l_paren))
1080  parseParens();
1081  if (FormatTok->is(tok::l_brace))
1082  parseChildBlock();
1083  break;
1084  case tok::l_brace:
1085  if (!tryToParseBracedList()) {
1086  // A block outside of parentheses must be the last part of a
1087  // structural element.
1088  // FIXME: Figure out cases where this is not true, and add projections
1089  // for them (the one we know is missing are lambdas).
1090  if (Style.BraceWrapping.AfterFunction)
1091  addUnwrappedLine();
1092  FormatTok->Type = TT_FunctionLBrace;
1093  parseBlock(/*MustBeDeclaration=*/false);
1094  addUnwrappedLine();
1095  return;
1096  }
1097  // Otherwise this was a braced init list, and the structural
1098  // element continues.
1099  break;
1100  case tok::kw_try:
1101  // We arrive here when parsing function-try blocks.
1102  parseTryCatch();
1103  return;
1104  case tok::identifier: {
1105  if (FormatTok->is(TT_MacroBlockEnd)) {
1106  addUnwrappedLine();
1107  return;
1108  }
1109 
1110  // Function declarations (as opposed to function expressions) are parsed
1111  // on their own unwrapped line by continuing this loop. Function
1112  // expressions (functions that are not on their own line) must not create
1113  // a new unwrapped line, so they are special cased below.
1114  size_t TokenCount = Line->Tokens.size();
1115  if (Style.Language == FormatStyle::LK_JavaScript &&
1116  FormatTok->is(Keywords.kw_function) &&
1117  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1118  Keywords.kw_async)))) {
1119  tryToParseJSFunction();
1120  break;
1121  }
1122  if ((Style.Language == FormatStyle::LK_JavaScript ||
1123  Style.Language == FormatStyle::LK_Java) &&
1124  FormatTok->is(Keywords.kw_interface)) {
1125  if (Style.Language == FormatStyle::LK_JavaScript) {
1126  // In JavaScript/TypeScript, "interface" can be used as a standalone
1127  // identifier, e.g. in `var interface = 1;`. If "interface" is
1128  // followed by another identifier, it is very like to be an actual
1129  // interface declaration.
1130  unsigned StoredPosition = Tokens->getPosition();
1131  FormatToken *Next = Tokens->getNextToken();
1132  FormatTok = Tokens->setPosition(StoredPosition);
1133  if (Next && !mustBeJSIdent(Keywords, Next)) {
1134  nextToken();
1135  break;
1136  }
1137  }
1138  parseRecord();
1139  addUnwrappedLine();
1140  return;
1141  }
1142 
1143  // See if the following token should start a new unwrapped line.
1144  StringRef Text = FormatTok->TokenText;
1145  nextToken();
1146  if (Line->Tokens.size() == 1 &&
1147  // JS doesn't have macros, and within classes colons indicate fields,
1148  // not labels.
1150  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1151  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1152  parseLabel();
1153  return;
1154  }
1155  // Recognize function-like macro usages without trailing semicolon as
1156  // well as free-standing macros like Q_OBJECT.
1157  bool FunctionLike = FormatTok->is(tok::l_paren);
1158  if (FunctionLike)
1159  parseParens();
1160 
1161  bool FollowedByNewline =
1162  CommentsBeforeNextToken.empty()
1163  ? FormatTok->NewlinesBefore > 0
1164  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1165 
1166  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1167  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1168  addUnwrappedLine();
1169  return;
1170  }
1171  }
1172  break;
1173  }
1174  case tok::equal:
1175  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1176  // TT_JsFatArrow. The always start an expression or a child block if
1177  // followed by a curly.
1178  if (FormatTok->is(TT_JsFatArrow)) {
1179  nextToken();
1180  if (FormatTok->is(tok::l_brace))
1181  parseChildBlock();
1182  break;
1183  }
1184 
1185  nextToken();
1186  if (FormatTok->Tok.is(tok::l_brace)) {
1187  nextToken();
1188  parseBracedList();
1189  } else if (Style.Language == FormatStyle::LK_Proto &&
1190  FormatTok->Tok.is(tok::less)) {
1191  nextToken();
1192  parseBracedList(/*ContinueOnSemicolons=*/false,
1193  /*ClosingBraceKind=*/tok::greater);
1194  }
1195  break;
1196  case tok::l_square:
1197  parseSquare();
1198  break;
1199  case tok::kw_new:
1200  parseNew();
1201  break;
1202  default:
1203  nextToken();
1204  break;
1205  }
1206  } while (!eof());
1207 }
1208 
1209 bool UnwrappedLineParser::tryToParseLambda() {
1210  if (!Style.isCpp()) {
1211  nextToken();
1212  return false;
1213  }
1214  const FormatToken* Previous = getPreviousToken();
1215  if (Previous &&
1216  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1217  tok::kw_delete) ||
1218  Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1219  nextToken();
1220  return false;
1221  }
1222  assert(FormatTok->is(tok::l_square));
1223  FormatToken &LSquare = *FormatTok;
1224  if (!tryToParseLambdaIntroducer())
1225  return false;
1226 
1227  while (FormatTok->isNot(tok::l_brace)) {
1228  if (FormatTok->isSimpleTypeSpecifier()) {
1229  nextToken();
1230  continue;
1231  }
1232  switch (FormatTok->Tok.getKind()) {
1233  case tok::l_brace:
1234  break;
1235  case tok::l_paren:
1236  parseParens();
1237  break;
1238  case tok::amp:
1239  case tok::star:
1240  case tok::kw_const:
1241  case tok::comma:
1242  case tok::less:
1243  case tok::greater:
1244  case tok::identifier:
1245  case tok::numeric_constant:
1246  case tok::coloncolon:
1247  case tok::kw_mutable:
1248  nextToken();
1249  break;
1250  case tok::arrow:
1251  FormatTok->Type = TT_LambdaArrow;
1252  nextToken();
1253  break;
1254  default:
1255  return true;
1256  }
1257  }
1258  LSquare.Type = TT_LambdaLSquare;
1259  parseChildBlock();
1260  return true;
1261 }
1262 
1263 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1264  nextToken();
1265  if (FormatTok->is(tok::equal)) {
1266  nextToken();
1267  if (FormatTok->is(tok::r_square)) {
1268  nextToken();
1269  return true;
1270  }
1271  if (FormatTok->isNot(tok::comma))
1272  return false;
1273  nextToken();
1274  } else if (FormatTok->is(tok::amp)) {
1275  nextToken();
1276  if (FormatTok->is(tok::r_square)) {
1277  nextToken();
1278  return true;
1279  }
1280  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1281  return false;
1282  }
1283  if (FormatTok->is(tok::comma))
1284  nextToken();
1285  } else if (FormatTok->is(tok::r_square)) {
1286  nextToken();
1287  return true;
1288  }
1289  do {
1290  if (FormatTok->is(tok::amp))
1291  nextToken();
1292  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1293  return false;
1294  nextToken();
1295  if (FormatTok->is(tok::ellipsis))
1296  nextToken();
1297  if (FormatTok->is(tok::comma)) {
1298  nextToken();
1299  } else if (FormatTok->is(tok::r_square)) {
1300  nextToken();
1301  return true;
1302  } else {
1303  return false;
1304  }
1305  } while (!eof());
1306  return false;
1307 }
1308 
1309 void UnwrappedLineParser::tryToParseJSFunction() {
1310  assert(FormatTok->is(Keywords.kw_function) ||
1311  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1312  if (FormatTok->is(Keywords.kw_async))
1313  nextToken();
1314  // Consume "function".
1315  nextToken();
1316 
1317  // Consume * (generator function). Treat it like C++'s overloaded operators.
1318  if (FormatTok->is(tok::star)) {
1319  FormatTok->Type = TT_OverloadedOperator;
1320  nextToken();
1321  }
1322 
1323  // Consume function name.
1324  if (FormatTok->is(tok::identifier))
1325  nextToken();
1326 
1327  if (FormatTok->isNot(tok::l_paren))
1328  return;
1329 
1330  // Parse formal parameter list.
1331  parseParens();
1332 
1333  if (FormatTok->is(tok::colon)) {
1334  // Parse a type definition.
1335  nextToken();
1336 
1337  // Eat the type declaration. For braced inline object types, balance braces,
1338  // otherwise just parse until finding an l_brace for the function body.
1339  if (FormatTok->is(tok::l_brace))
1340  tryToParseBracedList();
1341  else
1342  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1343  nextToken();
1344  }
1345 
1346  if (FormatTok->is(tok::semi))
1347  return;
1348 
1349  parseChildBlock();
1350 }
1351 
1352 bool UnwrappedLineParser::tryToParseBracedList() {
1353  if (FormatTok->BlockKind == BK_Unknown)
1354  calculateBraceTypes();
1355  assert(FormatTok->BlockKind != BK_Unknown);
1356  if (FormatTok->BlockKind == BK_Block)
1357  return false;
1358  nextToken();
1359  parseBracedList();
1360  return true;
1361 }
1362 
1363 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1364  tok::TokenKind ClosingBraceKind) {
1365  bool HasError = false;
1366 
1367  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1368  // replace this by using parseAssigmentExpression() inside.
1369  do {
1370  if (Style.Language == FormatStyle::LK_JavaScript) {
1371  if (FormatTok->is(Keywords.kw_function) ||
1372  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1373  tryToParseJSFunction();
1374  continue;
1375  }
1376  if (FormatTok->is(TT_JsFatArrow)) {
1377  nextToken();
1378  // Fat arrows can be followed by simple expressions or by child blocks
1379  // in curly braces.
1380  if (FormatTok->is(tok::l_brace)) {
1381  parseChildBlock();
1382  continue;
1383  }
1384  }
1385  if (FormatTok->is(tok::l_brace)) {
1386  // Could be a method inside of a braced list `{a() { return 1; }}`.
1387  if (tryToParseBracedList())
1388  continue;
1389  parseChildBlock();
1390  }
1391  }
1392  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1393  nextToken();
1394  return !HasError;
1395  }
1396  switch (FormatTok->Tok.getKind()) {
1397  case tok::caret:
1398  nextToken();
1399  if (FormatTok->is(tok::l_brace)) {
1400  parseChildBlock();
1401  }
1402  break;
1403  case tok::l_square:
1404  tryToParseLambda();
1405  break;
1406  case tok::l_paren:
1407  parseParens();
1408  // JavaScript can just have free standing methods and getters/setters in
1409  // object literals. Detect them by a "{" following ")".
1410  if (Style.Language == FormatStyle::LK_JavaScript) {
1411  if (FormatTok->is(tok::l_brace))
1412  parseChildBlock();
1413  break;
1414  }
1415  break;
1416  case tok::l_brace:
1417  // Assume there are no blocks inside a braced init list apart
1418  // from the ones we explicitly parse out (like lambdas).
1419  FormatTok->BlockKind = BK_BracedInit;
1420  nextToken();
1421  parseBracedList();
1422  break;
1423  case tok::semi:
1424  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1425  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1426  // used for error recovery if we have otherwise determined that this is
1427  // a braced list.
1428  if (Style.Language == FormatStyle::LK_JavaScript) {
1429  nextToken();
1430  break;
1431  }
1432  HasError = true;
1433  if (!ContinueOnSemicolons)
1434  return !HasError;
1435  nextToken();
1436  break;
1437  case tok::comma:
1438  nextToken();
1439  break;
1440  default:
1441  nextToken();
1442  break;
1443  }
1444  } while (!eof());
1445  return false;
1446 }
1447 
1448 void UnwrappedLineParser::parseParens() {
1449  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1450  nextToken();
1451  do {
1452  switch (FormatTok->Tok.getKind()) {
1453  case tok::l_paren:
1454  parseParens();
1455  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1456  parseChildBlock();
1457  break;
1458  case tok::r_paren:
1459  nextToken();
1460  return;
1461  case tok::r_brace:
1462  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1463  return;
1464  case tok::l_square:
1465  tryToParseLambda();
1466  break;
1467  case tok::l_brace:
1468  if (!tryToParseBracedList())
1469  parseChildBlock();
1470  break;
1471  case tok::at:
1472  nextToken();
1473  if (FormatTok->Tok.is(tok::l_brace)) {
1474  nextToken();
1475  parseBracedList();
1476  }
1477  break;
1478  case tok::kw_class:
1479  if (Style.Language == FormatStyle::LK_JavaScript)
1480  parseRecord(/*ParseAsExpr=*/true);
1481  else
1482  nextToken();
1483  break;
1484  case tok::identifier:
1485  if (Style.Language == FormatStyle::LK_JavaScript &&
1486  (FormatTok->is(Keywords.kw_function) ||
1487  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1488  tryToParseJSFunction();
1489  else
1490  nextToken();
1491  break;
1492  default:
1493  nextToken();
1494  break;
1495  }
1496  } while (!eof());
1497 }
1498 
1499 void UnwrappedLineParser::parseSquare() {
1500  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1501  if (tryToParseLambda())
1502  return;
1503  do {
1504  switch (FormatTok->Tok.getKind()) {
1505  case tok::l_paren:
1506  parseParens();
1507  break;
1508  case tok::r_square:
1509  nextToken();
1510  return;
1511  case tok::r_brace:
1512  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1513  return;
1514  case tok::l_square:
1515  parseSquare();
1516  break;
1517  case tok::l_brace: {
1518  if (!tryToParseBracedList())
1519  parseChildBlock();
1520  break;
1521  }
1522  case tok::at:
1523  nextToken();
1524  if (FormatTok->Tok.is(tok::l_brace)) {
1525  nextToken();
1526  parseBracedList();
1527  }
1528  break;
1529  default:
1530  nextToken();
1531  break;
1532  }
1533  } while (!eof());
1534 }
1535 
1536 void UnwrappedLineParser::parseIfThenElse() {
1537  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1538  nextToken();
1539  if (FormatTok->Tok.is(tok::kw_constexpr))
1540  nextToken();
1541  if (FormatTok->Tok.is(tok::l_paren))
1542  parseParens();
1543  bool NeedsUnwrappedLine = false;
1544  if (FormatTok->Tok.is(tok::l_brace)) {
1545  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1546  parseBlock(/*MustBeDeclaration=*/false);
1547  if (Style.BraceWrapping.BeforeElse)
1548  addUnwrappedLine();
1549  else
1550  NeedsUnwrappedLine = true;
1551  } else {
1552  addUnwrappedLine();
1553  ++Line->Level;
1554  parseStructuralElement();
1555  --Line->Level;
1556  }
1557  if (FormatTok->Tok.is(tok::kw_else)) {
1558  nextToken();
1559  if (FormatTok->Tok.is(tok::l_brace)) {
1560  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1561  parseBlock(/*MustBeDeclaration=*/false);
1562  addUnwrappedLine();
1563  } else if (FormatTok->Tok.is(tok::kw_if)) {
1564  parseIfThenElse();
1565  } else {
1566  addUnwrappedLine();
1567  ++Line->Level;
1568  parseStructuralElement();
1569  if (FormatTok->is(tok::eof))
1570  addUnwrappedLine();
1571  --Line->Level;
1572  }
1573  } else if (NeedsUnwrappedLine) {
1574  addUnwrappedLine();
1575  }
1576 }
1577 
1578 void UnwrappedLineParser::parseTryCatch() {
1579  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1580  nextToken();
1581  bool NeedsUnwrappedLine = false;
1582  if (FormatTok->is(tok::colon)) {
1583  // We are in a function try block, what comes is an initializer list.
1584  nextToken();
1585  while (FormatTok->is(tok::identifier)) {
1586  nextToken();
1587  if (FormatTok->is(tok::l_paren))
1588  parseParens();
1589  if (FormatTok->is(tok::comma))
1590  nextToken();
1591  }
1592  }
1593  // Parse try with resource.
1594  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1595  parseParens();
1596  }
1597  if (FormatTok->is(tok::l_brace)) {
1598  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1599  parseBlock(/*MustBeDeclaration=*/false);
1600  if (Style.BraceWrapping.BeforeCatch) {
1601  addUnwrappedLine();
1602  } else {
1603  NeedsUnwrappedLine = true;
1604  }
1605  } else if (!FormatTok->is(tok::kw_catch)) {
1606  // The C++ standard requires a compound-statement after a try.
1607  // If there's none, we try to assume there's a structuralElement
1608  // and try to continue.
1609  addUnwrappedLine();
1610  ++Line->Level;
1611  parseStructuralElement();
1612  --Line->Level;
1613  }
1614  while (1) {
1615  if (FormatTok->is(tok::at))
1616  nextToken();
1617  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1618  tok::kw___finally) ||
1619  ((Style.Language == FormatStyle::LK_Java ||
1621  FormatTok->is(Keywords.kw_finally)) ||
1622  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1623  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1624  break;
1625  nextToken();
1626  while (FormatTok->isNot(tok::l_brace)) {
1627  if (FormatTok->is(tok::l_paren)) {
1628  parseParens();
1629  continue;
1630  }
1631  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1632  return;
1633  nextToken();
1634  }
1635  NeedsUnwrappedLine = false;
1636  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1637  parseBlock(/*MustBeDeclaration=*/false);
1638  if (Style.BraceWrapping.BeforeCatch)
1639  addUnwrappedLine();
1640  else
1641  NeedsUnwrappedLine = true;
1642  }
1643  if (NeedsUnwrappedLine)
1644  addUnwrappedLine();
1645 }
1646 
1647 void UnwrappedLineParser::parseNamespace() {
1648  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1649 
1650  const FormatToken &InitialToken = *FormatTok;
1651  nextToken();
1652  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1653  nextToken();
1654  if (FormatTok->Tok.is(tok::l_brace)) {
1655  if (ShouldBreakBeforeBrace(Style, InitialToken))
1656  addUnwrappedLine();
1657 
1658  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1660  DeclarationScopeStack.size() > 1);
1661  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1662  // Munch the semicolon after a namespace. This is more common than one would
1663  // think. Puttin the semicolon into its own line is very ugly.
1664  if (FormatTok->Tok.is(tok::semi))
1665  nextToken();
1666  addUnwrappedLine();
1667  }
1668  // FIXME: Add error handling.
1669 }
1670 
1671 void UnwrappedLineParser::parseNew() {
1672  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1673  nextToken();
1674  if (Style.Language != FormatStyle::LK_Java)
1675  return;
1676 
1677  // In Java, we can parse everything up to the parens, which aren't optional.
1678  do {
1679  // There should not be a ;, { or } before the new's open paren.
1680  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1681  return;
1682 
1683  // Consume the parens.
1684  if (FormatTok->is(tok::l_paren)) {
1685  parseParens();
1686 
1687  // If there is a class body of an anonymous class, consume that as child.
1688  if (FormatTok->is(tok::l_brace))
1689  parseChildBlock();
1690  return;
1691  }
1692  nextToken();
1693  } while (!eof());
1694 }
1695 
1696 void UnwrappedLineParser::parseForOrWhileLoop() {
1697  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1698  "'for', 'while' or foreach macro expected");
1699  nextToken();
1700  // JS' for await ( ...
1701  if (Style.Language == FormatStyle::LK_JavaScript &&
1702  FormatTok->is(Keywords.kw_await))
1703  nextToken();
1704  if (FormatTok->Tok.is(tok::l_paren))
1705  parseParens();
1706  if (FormatTok->Tok.is(tok::l_brace)) {
1707  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1708  parseBlock(/*MustBeDeclaration=*/false);
1709  addUnwrappedLine();
1710  } else {
1711  addUnwrappedLine();
1712  ++Line->Level;
1713  parseStructuralElement();
1714  --Line->Level;
1715  }
1716 }
1717 
1718 void UnwrappedLineParser::parseDoWhile() {
1719  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1720  nextToken();
1721  if (FormatTok->Tok.is(tok::l_brace)) {
1722  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1723  parseBlock(/*MustBeDeclaration=*/false);
1724  if (Style.BraceWrapping.IndentBraces)
1725  addUnwrappedLine();
1726  } else {
1727  addUnwrappedLine();
1728  ++Line->Level;
1729  parseStructuralElement();
1730  --Line->Level;
1731  }
1732 
1733  // FIXME: Add error handling.
1734  if (!FormatTok->Tok.is(tok::kw_while)) {
1735  addUnwrappedLine();
1736  return;
1737  }
1738 
1739  nextToken();
1740  parseStructuralElement();
1741 }
1742 
1743 void UnwrappedLineParser::parseLabel() {
1744  nextToken();
1745  unsigned OldLineLevel = Line->Level;
1746  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1747  --Line->Level;
1748  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1749  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1750  parseBlock(/*MustBeDeclaration=*/false);
1751  if (FormatTok->Tok.is(tok::kw_break)) {
1753  addUnwrappedLine();
1754  parseStructuralElement();
1755  }
1756  addUnwrappedLine();
1757  } else {
1758  if (FormatTok->is(tok::semi))
1759  nextToken();
1760  addUnwrappedLine();
1761  }
1762  Line->Level = OldLineLevel;
1763  if (FormatTok->isNot(tok::l_brace)) {
1764  parseStructuralElement();
1765  addUnwrappedLine();
1766  }
1767 }
1768 
1769 void UnwrappedLineParser::parseCaseLabel() {
1770  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1771  // FIXME: fix handling of complex expressions here.
1772  do {
1773  nextToken();
1774  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1775  parseLabel();
1776 }
1777 
1778 void UnwrappedLineParser::parseSwitch() {
1779  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1780  nextToken();
1781  if (FormatTok->Tok.is(tok::l_paren))
1782  parseParens();
1783  if (FormatTok->Tok.is(tok::l_brace)) {
1784  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1785  parseBlock(/*MustBeDeclaration=*/false);
1786  addUnwrappedLine();
1787  } else {
1788  addUnwrappedLine();
1789  ++Line->Level;
1790  parseStructuralElement();
1791  --Line->Level;
1792  }
1793 }
1794 
1795 void UnwrappedLineParser::parseAccessSpecifier() {
1796  nextToken();
1797  // Understand Qt's slots.
1798  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1799  nextToken();
1800  // Otherwise, we don't know what it is, and we'd better keep the next token.
1801  if (FormatTok->Tok.is(tok::colon))
1802  nextToken();
1803  addUnwrappedLine();
1804 }
1805 
1806 bool UnwrappedLineParser::parseEnum() {
1807  // Won't be 'enum' for NS_ENUMs.
1808  if (FormatTok->Tok.is(tok::kw_enum))
1809  nextToken();
1810 
1811  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1812  // declarations. An "enum" keyword followed by a colon would be a syntax
1813  // error and thus assume it is just an identifier.
1814  if (Style.Language == FormatStyle::LK_JavaScript &&
1815  FormatTok->isOneOf(tok::colon, tok::question))
1816  return false;
1817 
1818  // Eat up enum class ...
1819  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1820  nextToken();
1821 
1822  while (FormatTok->Tok.getIdentifierInfo() ||
1823  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1824  tok::greater, tok::comma, tok::question)) {
1825  nextToken();
1826  // We can have macros or attributes in between 'enum' and the enum name.
1827  if (FormatTok->is(tok::l_paren))
1828  parseParens();
1829  if (FormatTok->is(tok::identifier)) {
1830  nextToken();
1831  // If there are two identifiers in a row, this is likely an elaborate
1832  // return type. In Java, this can be "implements", etc.
1833  if (Style.isCpp() && FormatTok->is(tok::identifier))
1834  return false;
1835  }
1836  }
1837 
1838  // Just a declaration or something is wrong.
1839  if (FormatTok->isNot(tok::l_brace))
1840  return true;
1841  FormatTok->BlockKind = BK_Block;
1842 
1843  if (Style.Language == FormatStyle::LK_Java) {
1844  // Java enums are different.
1845  parseJavaEnumBody();
1846  return true;
1847  }
1848  if (Style.Language == FormatStyle::LK_Proto) {
1849  parseBlock(/*MustBeDeclaration=*/true);
1850  return true;
1851  }
1852 
1853  // Parse enum body.
1854  nextToken();
1855  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1856  if (HasError) {
1857  if (FormatTok->is(tok::semi))
1858  nextToken();
1859  addUnwrappedLine();
1860  }
1861  return true;
1862 
1863  // There is no addUnwrappedLine() here so that we fall through to parsing a
1864  // structural element afterwards. Thus, in "enum A {} n, m;",
1865  // "} n, m;" will end up in one unwrapped line.
1866 }
1867 
1868 void UnwrappedLineParser::parseJavaEnumBody() {
1869  // Determine whether the enum is simple, i.e. does not have a semicolon or
1870  // constants with class bodies. Simple enums can be formatted like braced
1871  // lists, contracted to a single line, etc.
1872  unsigned StoredPosition = Tokens->getPosition();
1873  bool IsSimple = true;
1874  FormatToken *Tok = Tokens->getNextToken();
1875  while (Tok) {
1876  if (Tok->is(tok::r_brace))
1877  break;
1878  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1879  IsSimple = false;
1880  break;
1881  }
1882  // FIXME: This will also mark enums with braces in the arguments to enum
1883  // constants as "not simple". This is probably fine in practice, though.
1884  Tok = Tokens->getNextToken();
1885  }
1886  FormatTok = Tokens->setPosition(StoredPosition);
1887 
1888  if (IsSimple) {
1889  nextToken();
1890  parseBracedList();
1891  addUnwrappedLine();
1892  return;
1893  }
1894 
1895  // Parse the body of a more complex enum.
1896  // First add a line for everything up to the "{".
1897  nextToken();
1898  addUnwrappedLine();
1899  ++Line->Level;
1900 
1901  // Parse the enum constants.
1902  while (FormatTok) {
1903  if (FormatTok->is(tok::l_brace)) {
1904  // Parse the constant's class body.
1905  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1906  /*MunchSemi=*/false);
1907  } else if (FormatTok->is(tok::l_paren)) {
1908  parseParens();
1909  } else if (FormatTok->is(tok::comma)) {
1910  nextToken();
1911  addUnwrappedLine();
1912  } else if (FormatTok->is(tok::semi)) {
1913  nextToken();
1914  addUnwrappedLine();
1915  break;
1916  } else if (FormatTok->is(tok::r_brace)) {
1917  addUnwrappedLine();
1918  break;
1919  } else {
1920  nextToken();
1921  }
1922  }
1923 
1924  // Parse the class body after the enum's ";" if any.
1925  parseLevel(/*HasOpeningBrace=*/true);
1926  nextToken();
1927  --Line->Level;
1928  addUnwrappedLine();
1929 }
1930 
1931 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1932  const FormatToken &InitialToken = *FormatTok;
1933  nextToken();
1934 
1935  // The actual identifier can be a nested name specifier, and in macros
1936  // it is often token-pasted.
1937  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1938  tok::kw___attribute, tok::kw___declspec,
1939  tok::kw_alignas) ||
1940  ((Style.Language == FormatStyle::LK_Java ||
1942  FormatTok->isOneOf(tok::period, tok::comma))) {
1943  bool IsNonMacroIdentifier =
1944  FormatTok->is(tok::identifier) &&
1945  FormatTok->TokenText != FormatTok->TokenText.upper();
1946  nextToken();
1947  // We can have macros or attributes in between 'class' and the class name.
1948  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1949  parseParens();
1950  }
1951 
1952  // Note that parsing away template declarations here leads to incorrectly
1953  // accepting function declarations as record declarations.
1954  // In general, we cannot solve this problem. Consider:
1955  // class A<int> B() {}
1956  // which can be a function definition or a class definition when B() is a
1957  // macro. If we find enough real-world cases where this is a problem, we
1958  // can parse for the 'template' keyword in the beginning of the statement,
1959  // and thus rule out the record production in case there is no template
1960  // (this would still leave us with an ambiguity between template function
1961  // and class declarations).
1962  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1963  while (!eof()) {
1964  if (FormatTok->is(tok::l_brace)) {
1965  calculateBraceTypes(/*ExpectClassBody=*/true);
1966  if (!tryToParseBracedList())
1967  break;
1968  }
1969  if (FormatTok->Tok.is(tok::semi))
1970  return;
1971  nextToken();
1972  }
1973  }
1974  if (FormatTok->Tok.is(tok::l_brace)) {
1975  if (ParseAsExpr) {
1976  parseChildBlock();
1977  } else {
1978  if (ShouldBreakBeforeBrace(Style, InitialToken))
1979  addUnwrappedLine();
1980 
1981  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1982  /*MunchSemi=*/false);
1983  }
1984  }
1985  // There is no addUnwrappedLine() here so that we fall through to parsing a
1986  // structural element afterwards. Thus, in "class A {} n, m;",
1987  // "} n, m;" will end up in one unwrapped line.
1988 }
1989 
1990 void UnwrappedLineParser::parseObjCProtocolList() {
1991  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1992  do
1993  nextToken();
1994  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1995  nextToken(); // Skip '>'.
1996 }
1997 
1998 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1999  do {
2000  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2001  nextToken();
2002  addUnwrappedLine();
2003  break;
2004  }
2005  if (FormatTok->is(tok::l_brace)) {
2006  parseBlock(/*MustBeDeclaration=*/false);
2007  // In ObjC interfaces, nothing should be following the "}".
2008  addUnwrappedLine();
2009  } else if (FormatTok->is(tok::r_brace)) {
2010  // Ignore stray "}". parseStructuralElement doesn't consume them.
2011  nextToken();
2012  addUnwrappedLine();
2013  } else {
2014  parseStructuralElement();
2015  }
2016  } while (!eof());
2017 }
2018 
2019 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2020  nextToken();
2021  nextToken(); // interface name
2022 
2023  // @interface can be followed by either a base class, or a category.
2024  if (FormatTok->Tok.is(tok::colon)) {
2025  nextToken();
2026  nextToken(); // base class name
2027  } else if (FormatTok->Tok.is(tok::l_paren))
2028  // Skip category, if present.
2029  parseParens();
2030 
2031  if (FormatTok->Tok.is(tok::less))
2032  parseObjCProtocolList();
2033 
2034  if (FormatTok->Tok.is(tok::l_brace)) {
2036  addUnwrappedLine();
2037  parseBlock(/*MustBeDeclaration=*/true);
2038  }
2039 
2040  // With instance variables, this puts '}' on its own line. Without instance
2041  // variables, this ends the @interface line.
2042  addUnwrappedLine();
2043 
2044  parseObjCUntilAtEnd();
2045 }
2046 
2047 void UnwrappedLineParser::parseObjCProtocol() {
2048  nextToken();
2049  nextToken(); // protocol name
2050 
2051  if (FormatTok->Tok.is(tok::less))
2052  parseObjCProtocolList();
2053 
2054  // Check for protocol declaration.
2055  if (FormatTok->Tok.is(tok::semi)) {
2056  nextToken();
2057  return addUnwrappedLine();
2058  }
2059 
2060  addUnwrappedLine();
2061  parseObjCUntilAtEnd();
2062 }
2063 
2064 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2065  bool IsImport = FormatTok->is(Keywords.kw_import);
2066  assert(IsImport || FormatTok->is(tok::kw_export));
2067  nextToken();
2068 
2069  // Consume the "default" in "export default class/function".
2070  if (FormatTok->is(tok::kw_default))
2071  nextToken();
2072 
2073  // Consume "async function", "function" and "default function", so that these
2074  // get parsed as free-standing JS functions, i.e. do not require a trailing
2075  // semicolon.
2076  if (FormatTok->is(Keywords.kw_async))
2077  nextToken();
2078  if (FormatTok->is(Keywords.kw_function)) {
2079  nextToken();
2080  return;
2081  }
2082 
2083  // For imports, `export *`, `export {...}`, consume the rest of the line up
2084  // to the terminating `;`. For everything else, just return and continue
2085  // parsing the structural element, i.e. the declaration or expression for
2086  // `export default`.
2087  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2088  !FormatTok->isStringLiteral())
2089  return;
2090 
2091  while (!eof()) {
2092  if (FormatTok->is(tok::semi))
2093  return;
2094  if (Line->Tokens.size() == 0) {
2095  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2096  // import statement should terminate.
2097  return;
2098  }
2099  if (FormatTok->is(tok::l_brace)) {
2100  FormatTok->BlockKind = BK_Block;
2101  nextToken();
2102  parseBracedList();
2103  } else {
2104  nextToken();
2105  }
2106  }
2107 }
2108 
2109 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2110  StringRef Prefix = "") {
2111  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2112  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2113  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2114  E = Line.Tokens.end();
2115  I != E; ++I) {
2116  llvm::dbgs() << I->Tok->Tok.getName() << "["
2117  << "T=" << I->Tok->Type
2118  << ", OC=" << I->Tok->OriginalColumn << "] ";
2119  }
2120  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2121  E = Line.Tokens.end();
2122  I != E; ++I) {
2123  const UnwrappedLineNode &Node = *I;
2125  I = Node.Children.begin(),
2126  E = Node.Children.end();
2127  I != E; ++I) {
2128  printDebugInfo(*I, "\nChild: ");
2129  }
2130  }
2131  llvm::dbgs() << "\n";
2132 }
2133 
2134 void UnwrappedLineParser::addUnwrappedLine() {
2135  if (Line->Tokens.empty())
2136  return;
2137  DEBUG({
2138  if (CurrentLines == &Lines)
2139  printDebugInfo(*Line);
2140  });
2141  CurrentLines->push_back(std::move(*Line));
2142  Line->Tokens.clear();
2143  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2144  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2145  CurrentLines->append(
2146  std::make_move_iterator(PreprocessorDirectives.begin()),
2147  std::make_move_iterator(PreprocessorDirectives.end()));
2148  PreprocessorDirectives.clear();
2149  }
2150 }
2151 
2152 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2153 
2154 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2155  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2156  FormatTok.NewlinesBefore > 0;
2157 }
2158 
2159 // Checks if \p FormatTok is a line comment that continues the line comment
2160 // section on \p Line.
2161 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2162  const UnwrappedLine &Line,
2163  llvm::Regex &CommentPragmasRegex) {
2164  if (Line.Tokens.empty())
2165  return false;
2166 
2167  StringRef IndentContent = FormatTok.TokenText;
2168  if (FormatTok.TokenText.startswith("//") ||
2169  FormatTok.TokenText.startswith("/*"))
2170  IndentContent = FormatTok.TokenText.substr(2);
2171  if (CommentPragmasRegex.match(IndentContent))
2172  return false;
2173 
2174  // If Line starts with a line comment, then FormatTok continues the comment
2175  // section if its original column is greater or equal to the original start
2176  // column of the line.
2177  //
2178  // Define the min column token of a line as follows: if a line ends in '{' or
2179  // contains a '{' followed by a line comment, then the min column token is
2180  // that '{'. Otherwise, the min column token of the line is the first token of
2181  // the line.
2182  //
2183  // If Line starts with a token other than a line comment, then FormatTok
2184  // continues the comment section if its original column is greater than the
2185  // original start column of the min column token of the line.
2186  //
2187  // For example, the second line comment continues the first in these cases:
2188  //
2189  // // first line
2190  // // second line
2191  //
2192  // and:
2193  //
2194  // // first line
2195  // // second line
2196  //
2197  // and:
2198  //
2199  // int i; // first line
2200  // // second line
2201  //
2202  // and:
2203  //
2204  // do { // first line
2205  // // second line
2206  // int i;
2207  // } while (true);
2208  //
2209  // and:
2210  //
2211  // enum {
2212  // a, // first line
2213  // // second line
2214  // b
2215  // };
2216  //
2217  // The second line comment doesn't continue the first in these cases:
2218  //
2219  // // first line
2220  // // second line
2221  //
2222  // and:
2223  //
2224  // int i; // first line
2225  // // second line
2226  //
2227  // and:
2228  //
2229  // do { // first line
2230  // // second line
2231  // int i;
2232  // } while (true);
2233  //
2234  // and:
2235  //
2236  // enum {
2237  // a, // first line
2238  // // second line
2239  // };
2240  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2241 
2242  // Scan for '{//'. If found, use the column of '{' as a min column for line
2243  // comment section continuation.
2244  const FormatToken *PreviousToken = nullptr;
2245  for (const UnwrappedLineNode &Node : Line.Tokens) {
2246  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2247  isLineComment(*Node.Tok)) {
2248  MinColumnToken = PreviousToken;
2249  break;
2250  }
2251  PreviousToken = Node.Tok;
2252 
2253  // Grab the last newline preceding a token in this unwrapped line.
2254  if (Node.Tok->NewlinesBefore > 0) {
2255  MinColumnToken = Node.Tok;
2256  }
2257  }
2258  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2259  MinColumnToken = PreviousToken;
2260  }
2261 
2262  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2263  MinColumnToken);
2264 }
2265 
2266 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2267  bool JustComments = Line->Tokens.empty();
2269  I = CommentsBeforeNextToken.begin(),
2270  E = CommentsBeforeNextToken.end();
2271  I != E; ++I) {
2272  // Line comments that belong to the same line comment section are put on the
2273  // same line since later we might want to reflow content between them.
2274  // Additional fine-grained breaking of line comment sections is controlled
2275  // by the class BreakableLineCommentSection in case it is desirable to keep
2276  // several line comment sections in the same unwrapped line.
2277  //
2278  // FIXME: Consider putting separate line comment sections as children to the
2279  // unwrapped line instead.
2280  (*I)->ContinuesLineCommentSection =
2281  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2282  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2283  addUnwrappedLine();
2284  pushToken(*I);
2285  }
2286  if (NewlineBeforeNext && JustComments)
2287  addUnwrappedLine();
2288  CommentsBeforeNextToken.clear();
2289 }
2290 
2291 void UnwrappedLineParser::nextToken(int LevelDifference) {
2292  if (eof())
2293  return;
2294  flushComments(isOnNewLine(*FormatTok));
2295  pushToken(FormatTok);
2296  if (Style.Language != FormatStyle::LK_JavaScript)
2297  readToken(LevelDifference);
2298  else
2299  readTokenWithJavaScriptASI();
2300 }
2301 
2302 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2303  // FIXME: This is a dirty way to access the previous token. Find a better
2304  // solution.
2305  if (!Line || Line->Tokens.empty())
2306  return nullptr;
2307  return Line->Tokens.back().Tok;
2308 }
2309 
2310 void UnwrappedLineParser::distributeComments(
2311  const SmallVectorImpl<FormatToken *> &Comments,
2312  const FormatToken *NextTok) {
2313  // Whether or not a line comment token continues a line is controlled by
2314  // the method continuesLineCommentSection, with the following caveat:
2315  //
2316  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2317  // that each comment line from the trail is aligned with the next token, if
2318  // the next token exists. If a trail exists, the beginning of the maximal
2319  // trail is marked as a start of a new comment section.
2320  //
2321  // For example in this code:
2322  //
2323  // int a; // line about a
2324  // // line 1 about b
2325  // // line 2 about b
2326  // int b;
2327  //
2328  // the two lines about b form a maximal trail, so there are two sections, the
2329  // first one consisting of the single comment "// line about a" and the
2330  // second one consisting of the next two comments.
2331  if (Comments.empty())
2332  return;
2333  bool ShouldPushCommentsInCurrentLine = true;
2334  bool HasTrailAlignedWithNextToken = false;
2335  unsigned StartOfTrailAlignedWithNextToken = 0;
2336  if (NextTok) {
2337  // We are skipping the first element intentionally.
2338  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2339  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2340  HasTrailAlignedWithNextToken = true;
2341  StartOfTrailAlignedWithNextToken = i;
2342  }
2343  }
2344  }
2345  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2346  FormatToken *FormatTok = Comments[i];
2347  if (HasTrailAlignedWithNextToken &&
2348  i == StartOfTrailAlignedWithNextToken) {
2349  FormatTok->ContinuesLineCommentSection = false;
2350  } else {
2351  FormatTok->ContinuesLineCommentSection =
2352  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2353  }
2354  if (!FormatTok->ContinuesLineCommentSection &&
2355  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2356  ShouldPushCommentsInCurrentLine = false;
2357  }
2358  if (ShouldPushCommentsInCurrentLine) {
2359  pushToken(FormatTok);
2360  } else {
2361  CommentsBeforeNextToken.push_back(FormatTok);
2362  }
2363  }
2364 }
2365 
2366 void UnwrappedLineParser::readToken(int LevelDifference) {
2368  do {
2369  FormatTok = Tokens->getNextToken();
2370  assert(FormatTok);
2371  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2372  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2373  distributeComments(Comments, FormatTok);
2374  Comments.clear();
2375  // If there is an unfinished unwrapped line, we flush the preprocessor
2376  // directives only after that unwrapped line was finished later.
2377  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2378  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2379  assert((LevelDifference >= 0 ||
2380  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2381  "LevelDifference makes Line->Level negative");
2382  Line->Level += LevelDifference;
2383  // Comments stored before the preprocessor directive need to be output
2384  // before the preprocessor directive, at the same level as the
2385  // preprocessor directive, as we consider them to apply to the directive.
2386  flushComments(isOnNewLine(*FormatTok));
2387  parsePPDirective();
2388  }
2389  while (FormatTok->Type == TT_ConflictStart ||
2390  FormatTok->Type == TT_ConflictEnd ||
2391  FormatTok->Type == TT_ConflictAlternative) {
2392  if (FormatTok->Type == TT_ConflictStart) {
2393  conditionalCompilationStart(/*Unreachable=*/false);
2394  } else if (FormatTok->Type == TT_ConflictAlternative) {
2395  conditionalCompilationAlternative();
2396  } else if (FormatTok->Type == TT_ConflictEnd) {
2397  conditionalCompilationEnd();
2398  }
2399  FormatTok = Tokens->getNextToken();
2400  FormatTok->MustBreakBefore = true;
2401  }
2402 
2403  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2404  !Line->InPPDirective) {
2405  continue;
2406  }
2407 
2408  if (!FormatTok->Tok.is(tok::comment)) {
2409  distributeComments(Comments, FormatTok);
2410  Comments.clear();
2411  return;
2412  }
2413 
2414  Comments.push_back(FormatTok);
2415  } while (!eof());
2416 
2417  distributeComments(Comments, nullptr);
2418  Comments.clear();
2419 }
2420 
2421 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2422  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2423  if (MustBreakBeforeNextToken) {
2424  Line->Tokens.back().Tok->MustBreakBefore = true;
2425  MustBreakBeforeNextToken = false;
2426  }
2427 }
2428 
2429 } // end namespace format
2430 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:676
Indent in all namespaces.
Definition: Format.h:1208
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:123
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1121
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1025
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:214
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:155
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:387
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:129
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:357
bool AfterObjCDeclaration
Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
Definition: Format.h:648
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:708
Should be used for Java.
Definition: Format.h:1114
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:91
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:46
bool isNot(T Kind) const
Definition: FormatToken.h:312
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1212
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:305
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1116
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:749
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterFunction
Wrap function definitions.
Definition: Format.h:630
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:119
SourceLocation getEnd() const
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:296
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:621
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:136
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:167
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:55
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:41
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1198
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1131
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
/file This file defines classes for searching and anlyzing source code clones.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:294
Should be used for TableGen code.
Definition: Format.h:1123
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:662
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:317
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:602
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:44
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:161
bool AfterClass
Wrap class definitions.
Definition: Format.h:584
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1126
StringRef Text
Definition: Format.cpp:1302
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:273
bool isStringLiteral() const
Definition: FormatToken.h:328
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:646
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:133
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:173
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
const FormatStyle & Style