clang  5.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
54  UnwrappedLine &Line;
55  std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61  FormatToken *&ResetToken)
62  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64  Token(nullptr) {
65  TokenSource = this;
66  Line.Level = 0;
67  Line.InPPDirective = true;
68  }
69 
70  ~ScopedMacroState() override {
71  TokenSource = PreviousTokenSource;
72  ResetToken = Token;
73  Line.InPPDirective = false;
74  Line.Level = PreviousLineLevel;
75  }
76 
77  FormatToken *getNextToken() override {
78  // The \c UnwrappedLineParser guards against this by never calling
79  // \c getNextToken() after it has encountered the first eof token.
80  assert(!eof());
81  Token = PreviousTokenSource->getNextToken();
82  if (eof())
83  return getFakeEOF();
84  return Token;
85  }
86 
87  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89  FormatToken *setPosition(unsigned Position) override {
90  Token = PreviousTokenSource->setPosition(Position);
91  return Token;
92  }
93 
94 private:
95  bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97  FormatToken *getFakeEOF() {
98  static bool EOFInitialized = false;
99  static FormatToken FormatTok;
100  if (!EOFInitialized) {
101  FormatTok.Tok.startToken();
102  FormatTok.Tok.setKind(tok::eof);
103  EOFInitialized = true;
104  }
105  return &FormatTok;
106  }
107 
108  UnwrappedLine &Line;
109  FormatTokenSource *&TokenSource;
110  FormatToken *&ResetToken;
111  unsigned PreviousLineLevel;
112  FormatTokenSource *PreviousTokenSource;
113 
115 };
116 
117 } // end anonymous namespace
118 
120 public:
122  bool SwitchToPreprocessorLines = false)
123  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124  if (SwitchToPreprocessorLines)
125  Parser.CurrentLines = &Parser.PreprocessorDirectives;
126  else if (!Parser.Line->Tokens.empty())
127  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128  PreBlockLine = std::move(Parser.Line);
129  Parser.Line = llvm::make_unique<UnwrappedLine>();
130  Parser.Line->Level = PreBlockLine->Level;
131  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132  }
133 
135  if (!Parser.Line->Tokens.empty()) {
136  Parser.addUnwrappedLine();
137  }
138  assert(Parser.Line->Tokens.empty());
139  Parser.Line = std::move(PreBlockLine);
140  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141  Parser.MustBreakBeforeNextToken = true;
142  Parser.CurrentLines = OriginalLines;
143  }
144 
145 private:
147 
148  std::unique_ptr<UnwrappedLine> PreBlockLine;
149  SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
153 public:
155  const FormatStyle &Style, unsigned &LineLevel)
156  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
158  Parser->addUnwrappedLine();
159  if (Style.BraceWrapping.IndentBraces)
160  ++LineLevel;
161  }
162  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165  unsigned &LineLevel;
166  unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174  : Tokens(Tokens), Position(-1) {}
175 
176  FormatToken *getNextToken() override {
177  ++Position;
178  return Tokens[Position];
179  }
180 
181  unsigned getPosition() override {
182  assert(Position >= 0);
183  return Position;
184  }
185 
186  FormatToken *setPosition(unsigned P) override {
187  Position = P;
188  return Tokens[Position];
189  }
190 
191  void reset() { Position = -1; }
192 
193 private:
195  int Position;
196 };
197 
198 } // end anonymous namespace
199 
201  const AdditionalKeywords &Keywords,
203  UnwrappedLineConsumer &Callback)
204  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
206  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
207  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
208 
209 void UnwrappedLineParser::reset() {
210  PPBranchLevel = -1;
211  Line.reset(new UnwrappedLine);
212  CommentsBeforeNextToken.clear();
213  FormatTok = nullptr;
214  MustBreakBeforeNextToken = false;
215  PreprocessorDirectives.clear();
216  CurrentLines = &Lines;
217  DeclarationScopeStack.clear();
218  PPStack.clear();
219 }
220 
222  IndexedTokenSource TokenSource(AllTokens);
223  do {
224  DEBUG(llvm::dbgs() << "----\n");
225  reset();
226  Tokens = &TokenSource;
227  TokenSource.reset();
228 
229  readToken();
230  parseFile();
231  // Create line with eof token.
232  pushToken(FormatTok);
233  addUnwrappedLine();
234 
235  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
236  E = Lines.end();
237  I != E; ++I) {
238  Callback.consumeUnwrappedLine(*I);
239  }
240  Callback.finishRun();
241  Lines.clear();
242  while (!PPLevelBranchIndex.empty() &&
243  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
244  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
245  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
246  }
247  if (!PPLevelBranchIndex.empty()) {
248  ++PPLevelBranchIndex.back();
249  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
250  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
251  }
252  } while (!PPLevelBranchIndex.empty());
253 }
254 
255 void UnwrappedLineParser::parseFile() {
256  // The top-level context in a file always has declarations, except for pre-
257  // processor directives and JavaScript files.
258  bool MustBeDeclaration =
259  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
260  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
261  MustBeDeclaration);
262  parseLevel(/*HasOpeningBrace=*/false);
263  // Make sure to format the remaining tokens.
264  flushComments(true);
265  addUnwrappedLine();
266 }
267 
268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
269  bool SwitchLabelEncountered = false;
270  do {
271  tok::TokenKind kind = FormatTok->Tok.getKind();
272  if (FormatTok->Type == TT_MacroBlockBegin) {
273  kind = tok::l_brace;
274  } else if (FormatTok->Type == TT_MacroBlockEnd) {
275  kind = tok::r_brace;
276  }
277 
278  switch (kind) {
279  case tok::comment:
280  nextToken();
281  addUnwrappedLine();
282  break;
283  case tok::l_brace:
284  // FIXME: Add parameter whether this can happen - if this happens, we must
285  // be in a non-declaration context.
286  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
287  continue;
288  parseBlock(/*MustBeDeclaration=*/false);
289  addUnwrappedLine();
290  break;
291  case tok::r_brace:
292  if (HasOpeningBrace)
293  return;
294  nextToken();
295  addUnwrappedLine();
296  break;
297  case tok::kw_default:
298  case tok::kw_case:
299  if (!SwitchLabelEncountered &&
300  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
301  ++Line->Level;
302  SwitchLabelEncountered = true;
303  parseStructuralElement();
304  break;
305  default:
306  parseStructuralElement();
307  break;
308  }
309  } while (!eof());
310 }
311 
312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
313  // We'll parse forward through the tokens until we hit
314  // a closing brace or eof - note that getNextToken() will
315  // parse macros, so this will magically work inside macro
316  // definitions, too.
317  unsigned StoredPosition = Tokens->getPosition();
318  FormatToken *Tok = FormatTok;
319  const FormatToken *PrevTok = getPreviousToken();
320  // Keep a stack of positions of lbrace tokens. We will
321  // update information about whether an lbrace starts a
322  // braced init list or a different block during the loop.
323  SmallVector<FormatToken *, 8> LBraceStack;
324  assert(Tok->Tok.is(tok::l_brace));
325  do {
326  // Get next non-comment token.
327  FormatToken *NextTok;
328  unsigned ReadTokens = 0;
329  do {
330  NextTok = Tokens->getNextToken();
331  ++ReadTokens;
332  } while (NextTok->is(tok::comment));
333 
334  switch (Tok->Tok.getKind()) {
335  case tok::l_brace:
336  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
337  PrevTok->is(tok::colon))
338  // A colon indicates this code is in a type, or a braced list following
339  // a label in an object literal ({a: {b: 1}}).
340  // The code below could be confused by semicolons between the individual
341  // members in a type member list, which would normally trigger BK_Block.
342  // In both cases, this must be parsed as an inline braced init.
343  Tok->BlockKind = BK_BracedInit;
344  else
345  Tok->BlockKind = BK_Unknown;
346  LBraceStack.push_back(Tok);
347  break;
348  case tok::r_brace:
349  if (LBraceStack.empty())
350  break;
351  if (LBraceStack.back()->BlockKind == BK_Unknown) {
352  bool ProbablyBracedList = false;
353  if (Style.Language == FormatStyle::LK_Proto) {
354  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
355  } else {
356  // Using OriginalColumn to distinguish between ObjC methods and
357  // binary operators is a bit hacky.
358  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
359  NextTok->OriginalColumn == 0;
360 
361  // If there is a comma, semicolon or right paren after the closing
362  // brace, we assume this is a braced initializer list. Note that
363  // regardless how we mark inner braces here, we will overwrite the
364  // BlockKind later if we parse a braced list (where all blocks
365  // inside are by default braced lists), or when we explicitly detect
366  // blocks (for example while parsing lambdas).
367  ProbablyBracedList =
369  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
370  Keywords.kw_as)) ||
371  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
372  tok::r_paren, tok::r_square, tok::l_brace,
373  tok::l_square, tok::l_paren, tok::ellipsis) ||
374  (NextTok->is(tok::identifier) &&
375  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
376  (NextTok->is(tok::semi) &&
377  (!ExpectClassBody || LBraceStack.size() != 1)) ||
378  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
379  }
380  if (ProbablyBracedList) {
381  Tok->BlockKind = BK_BracedInit;
382  LBraceStack.back()->BlockKind = BK_BracedInit;
383  } else {
384  Tok->BlockKind = BK_Block;
385  LBraceStack.back()->BlockKind = BK_Block;
386  }
387  }
388  LBraceStack.pop_back();
389  break;
390  case tok::at:
391  case tok::semi:
392  case tok::kw_if:
393  case tok::kw_while:
394  case tok::kw_for:
395  case tok::kw_switch:
396  case tok::kw_try:
397  case tok::kw___try:
398  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
399  LBraceStack.back()->BlockKind = BK_Block;
400  break;
401  default:
402  break;
403  }
404  PrevTok = Tok;
405  Tok = NextTok;
406  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
407 
408  // Assume other blocks for all unclosed opening braces.
409  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
410  if (LBraceStack[i]->BlockKind == BK_Unknown)
411  LBraceStack[i]->BlockKind = BK_Block;
412  }
413 
414  FormatTok = Tokens->setPosition(StoredPosition);
415 }
416 
417 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
418  bool MunchSemi) {
419  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
420  "'{' or macro block token expected");
421  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
422  FormatTok->BlockKind = BK_Block;
423 
424  unsigned InitialLevel = Line->Level;
425  nextToken();
426 
427  if (MacroBlock && FormatTok->is(tok::l_paren))
428  parseParens();
429 
430  addUnwrappedLine();
431  size_t OpeningLineIndex =
432  Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
433 
434  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
435  MustBeDeclaration);
436  if (AddLevel)
437  ++Line->Level;
438  parseLevel(/*HasOpeningBrace=*/true);
439 
440  if (eof())
441  return;
442 
443  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
444  : !FormatTok->is(tok::r_brace)) {
445  Line->Level = InitialLevel;
446  FormatTok->BlockKind = BK_Block;
447  return;
448  }
449 
450  nextToken(); // Munch the closing brace.
451 
452  if (MacroBlock && FormatTok->is(tok::l_paren))
453  parseParens();
454 
455  if (MunchSemi && FormatTok->Tok.is(tok::semi))
456  nextToken();
457  Line->Level = InitialLevel;
458  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
459 }
460 
461 static bool isGoogScope(const UnwrappedLine &Line) {
462  // FIXME: Closure-library specific stuff should not be hard-coded but be
463  // configurable.
464  if (Line.Tokens.size() < 4)
465  return false;
466  auto I = Line.Tokens.begin();
467  if (I->Tok->TokenText != "goog")
468  return false;
469  ++I;
470  if (I->Tok->isNot(tok::period))
471  return false;
472  ++I;
473  if (I->Tok->TokenText != "scope")
474  return false;
475  ++I;
476  return I->Tok->is(tok::l_paren);
477 }
478 
479 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
480  const FormatToken &InitialToken) {
481  if (InitialToken.is(tok::kw_namespace))
482  return Style.BraceWrapping.AfterNamespace;
483  if (InitialToken.is(tok::kw_class))
484  return Style.BraceWrapping.AfterClass;
485  if (InitialToken.is(tok::kw_union))
486  return Style.BraceWrapping.AfterUnion;
487  if (InitialToken.is(tok::kw_struct))
488  return Style.BraceWrapping.AfterStruct;
489  return false;
490 }
491 
492 void UnwrappedLineParser::parseChildBlock() {
493  FormatTok->BlockKind = BK_Block;
494  nextToken();
495  {
496  bool GoogScope =
498  ScopedLineState LineState(*this);
499  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
500  /*MustBeDeclaration=*/false);
501  Line->Level += GoogScope ? 0 : 1;
502  parseLevel(/*HasOpeningBrace=*/true);
503  flushComments(isOnNewLine(*FormatTok));
504  Line->Level -= GoogScope ? 0 : 1;
505  }
506  nextToken();
507 }
508 
509 void UnwrappedLineParser::parsePPDirective() {
510  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
511  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
512  nextToken();
513 
514  if (!FormatTok->Tok.getIdentifierInfo()) {
515  parsePPUnknown();
516  return;
517  }
518 
519  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
520  case tok::pp_define:
521  parsePPDefine();
522  return;
523  case tok::pp_if:
524  parsePPIf(/*IfDef=*/false);
525  break;
526  case tok::pp_ifdef:
527  case tok::pp_ifndef:
528  parsePPIf(/*IfDef=*/true);
529  break;
530  case tok::pp_else:
531  parsePPElse();
532  break;
533  case tok::pp_elif:
534  parsePPElIf();
535  break;
536  case tok::pp_endif:
537  parsePPEndIf();
538  break;
539  default:
540  parsePPUnknown();
541  break;
542  }
543 }
544 
545 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
546  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
547  PPStack.push_back(PP_Unreachable);
548  else
549  PPStack.push_back(PP_Conditional);
550 }
551 
552 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
553  ++PPBranchLevel;
554  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
555  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
556  PPLevelBranchIndex.push_back(0);
557  PPLevelBranchCount.push_back(0);
558  }
559  PPChainBranchIndex.push(0);
560  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
561  conditionalCompilationCondition(Unreachable || Skip);
562 }
563 
564 void UnwrappedLineParser::conditionalCompilationAlternative() {
565  if (!PPStack.empty())
566  PPStack.pop_back();
567  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
568  if (!PPChainBranchIndex.empty())
569  ++PPChainBranchIndex.top();
570  conditionalCompilationCondition(
571  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
572  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
573 }
574 
575 void UnwrappedLineParser::conditionalCompilationEnd() {
576  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
577  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
578  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
579  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
580  }
581  }
582  // Guard against #endif's without #if.
583  if (PPBranchLevel > 0)
584  --PPBranchLevel;
585  if (!PPChainBranchIndex.empty())
586  PPChainBranchIndex.pop();
587  if (!PPStack.empty())
588  PPStack.pop_back();
589 }
590 
591 void UnwrappedLineParser::parsePPIf(bool IfDef) {
592  bool IfNDef = FormatTok->is(tok::pp_ifndef);
593  nextToken();
594  bool Unreachable = false;
595  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
596  Unreachable = true;
597  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
598  Unreachable = true;
599  conditionalCompilationStart(Unreachable);
600  parsePPUnknown();
601 }
602 
603 void UnwrappedLineParser::parsePPElse() {
604  conditionalCompilationAlternative();
605  parsePPUnknown();
606 }
607 
608 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
609 
610 void UnwrappedLineParser::parsePPEndIf() {
611  conditionalCompilationEnd();
612  parsePPUnknown();
613 }
614 
615 void UnwrappedLineParser::parsePPDefine() {
616  nextToken();
617 
618  if (FormatTok->Tok.getKind() != tok::identifier) {
619  parsePPUnknown();
620  return;
621  }
622  nextToken();
623  if (FormatTok->Tok.getKind() == tok::l_paren &&
624  FormatTok->WhitespaceRange.getBegin() ==
625  FormatTok->WhitespaceRange.getEnd()) {
626  parseParens();
627  }
628  addUnwrappedLine();
629  Line->Level = 1;
630 
631  // Errors during a preprocessor directive can only affect the layout of the
632  // preprocessor directive, and thus we ignore them. An alternative approach
633  // would be to use the same approach we use on the file level (no
634  // re-indentation if there was a structural error) within the macro
635  // definition.
636  parseFile();
637 }
638 
639 void UnwrappedLineParser::parsePPUnknown() {
640  do {
641  nextToken();
642  } while (!eof());
643  addUnwrappedLine();
644 }
645 
646 // Here we blacklist certain tokens that are not usually the first token in an
647 // unwrapped line. This is used in attempt to distinguish macro calls without
648 // trailing semicolons from other constructs split to several lines.
649 static bool tokenCanStartNewLine(const clang::Token &Tok) {
650  // Semicolon can be a null-statement, l_square can be a start of a macro or
651  // a C++11 attribute, but this doesn't seem to be common.
652  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
653  Tok.isNot(tok::l_square) &&
654  // Tokens that can only be used as binary operators and a part of
655  // overloaded operator names.
656  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
657  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
658  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
659  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
660  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
661  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
662  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
663  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
664  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
665  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
666  Tok.isNot(tok::lesslessequal) &&
667  // Colon is used in labels, base class lists, initializer lists,
668  // range-based for loops, ternary operator, but should never be the
669  // first token in an unwrapped line.
670  Tok.isNot(tok::colon) &&
671  // 'noexcept' is a trailing annotation.
672  Tok.isNot(tok::kw_noexcept);
673 }
674 
675 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
676  const FormatToken *FormatTok) {
677  // FIXME: This returns true for C/C++ keywords like 'struct'.
678  return FormatTok->is(tok::identifier) &&
679  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
680  !FormatTok->isOneOf(
681  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
682  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
683  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
684  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
685  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
686  Keywords.kw_instanceof, Keywords.kw_interface,
687  Keywords.kw_throws));
688 }
689 
690 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
691  const FormatToken *FormatTok) {
692  return FormatTok->Tok.isLiteral() ||
693  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
694  mustBeJSIdent(Keywords, FormatTok);
695 }
696 
697 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
698 // when encountered after a value (see mustBeJSIdentOrValue).
699 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
700  const FormatToken *FormatTok) {
701  return FormatTok->isOneOf(
702  tok::kw_return, Keywords.kw_yield,
703  // conditionals
704  tok::kw_if, tok::kw_else,
705  // loops
706  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
707  // switch/case
708  tok::kw_switch, tok::kw_case,
709  // exceptions
710  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
711  // declaration
712  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
713  Keywords.kw_async, Keywords.kw_function,
714  // import/export
715  Keywords.kw_import, tok::kw_export);
716 }
717 
718 // readTokenWithJavaScriptASI reads the next token and terminates the current
719 // line if JavaScript Automatic Semicolon Insertion must
720 // happen between the current token and the next token.
721 //
722 // This method is conservative - it cannot cover all edge cases of JavaScript,
723 // but only aims to correctly handle certain well known cases. It *must not*
724 // return true in speculative cases.
725 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
726  FormatToken *Previous = FormatTok;
727  readToken();
728  FormatToken *Next = FormatTok;
729 
730  bool IsOnSameLine =
731  CommentsBeforeNextToken.empty()
732  ? Next->NewlinesBefore == 0
733  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
734  if (IsOnSameLine)
735  return;
736 
737  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
738  bool PreviousStartsTemplateExpr =
739  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
740  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
741  // If the token before the previous one is an '@', the previous token is an
742  // annotation and can precede another identifier/value.
743  const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
744  if (PrePrevious->is(tok::at))
745  return;
746  }
747  if (Next->is(tok::exclaim) && PreviousMustBeValue)
748  return addUnwrappedLine();
749  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
750  bool NextEndsTemplateExpr =
751  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
752  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
753  (PreviousMustBeValue ||
754  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
755  tok::minusminus)))
756  return addUnwrappedLine();
757  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
758  return addUnwrappedLine();
759 }
760 
761 void UnwrappedLineParser::parseStructuralElement() {
762  assert(!FormatTok->is(tok::l_brace));
763  if (Style.Language == FormatStyle::LK_TableGen &&
764  FormatTok->is(tok::pp_include)) {
765  nextToken();
766  if (FormatTok->is(tok::string_literal))
767  nextToken();
768  addUnwrappedLine();
769  return;
770  }
771  switch (FormatTok->Tok.getKind()) {
772  case tok::at:
773  nextToken();
774  if (FormatTok->Tok.is(tok::l_brace)) {
775  parseBracedList();
776  break;
777  }
778  switch (FormatTok->Tok.getObjCKeywordID()) {
779  case tok::objc_public:
780  case tok::objc_protected:
781  case tok::objc_package:
782  case tok::objc_private:
783  return parseAccessSpecifier();
784  case tok::objc_interface:
785  case tok::objc_implementation:
786  return parseObjCInterfaceOrImplementation();
787  case tok::objc_protocol:
788  return parseObjCProtocol();
789  case tok::objc_end:
790  return; // Handled by the caller.
791  case tok::objc_optional:
792  case tok::objc_required:
793  nextToken();
794  addUnwrappedLine();
795  return;
796  case tok::objc_autoreleasepool:
797  nextToken();
798  if (FormatTok->Tok.is(tok::l_brace)) {
800  addUnwrappedLine();
801  parseBlock(/*MustBeDeclaration=*/false);
802  }
803  addUnwrappedLine();
804  return;
805  case tok::objc_try:
806  // This branch isn't strictly necessary (the kw_try case below would
807  // do this too after the tok::at is parsed above). But be explicit.
808  parseTryCatch();
809  return;
810  default:
811  break;
812  }
813  break;
814  case tok::kw_asm:
815  nextToken();
816  if (FormatTok->is(tok::l_brace)) {
817  FormatTok->Type = TT_InlineASMBrace;
818  nextToken();
819  while (FormatTok && FormatTok->isNot(tok::eof)) {
820  if (FormatTok->is(tok::r_brace)) {
821  FormatTok->Type = TT_InlineASMBrace;
822  nextToken();
823  addUnwrappedLine();
824  break;
825  }
826  FormatTok->Finalized = true;
827  nextToken();
828  }
829  }
830  break;
831  case tok::kw_namespace:
832  parseNamespace();
833  return;
834  case tok::kw_inline:
835  nextToken();
836  if (FormatTok->Tok.is(tok::kw_namespace)) {
837  parseNamespace();
838  return;
839  }
840  break;
841  case tok::kw_public:
842  case tok::kw_protected:
843  case tok::kw_private:
844  if (Style.Language == FormatStyle::LK_Java ||
846  nextToken();
847  else
848  parseAccessSpecifier();
849  return;
850  case tok::kw_if:
851  parseIfThenElse();
852  return;
853  case tok::kw_for:
854  case tok::kw_while:
855  parseForOrWhileLoop();
856  return;
857  case tok::kw_do:
858  parseDoWhile();
859  return;
860  case tok::kw_switch:
861  parseSwitch();
862  return;
863  case tok::kw_default:
864  nextToken();
865  parseLabel();
866  return;
867  case tok::kw_case:
868  parseCaseLabel();
869  return;
870  case tok::kw_try:
871  case tok::kw___try:
872  parseTryCatch();
873  return;
874  case tok::kw_extern:
875  nextToken();
876  if (FormatTok->Tok.is(tok::string_literal)) {
877  nextToken();
878  if (FormatTok->Tok.is(tok::l_brace)) {
879  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
880  addUnwrappedLine();
881  return;
882  }
883  }
884  break;
885  case tok::kw_export:
886  if (Style.Language == FormatStyle::LK_JavaScript) {
887  parseJavaScriptEs6ImportExport();
888  return;
889  }
890  break;
891  case tok::identifier:
892  if (FormatTok->is(TT_ForEachMacro)) {
893  parseForOrWhileLoop();
894  return;
895  }
896  if (FormatTok->is(TT_MacroBlockBegin)) {
897  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
898  /*MunchSemi=*/false);
899  return;
900  }
901  if (FormatTok->is(Keywords.kw_import)) {
902  if (Style.Language == FormatStyle::LK_JavaScript) {
903  parseJavaScriptEs6ImportExport();
904  return;
905  }
906  if (Style.Language == FormatStyle::LK_Proto) {
907  nextToken();
908  if (FormatTok->is(tok::kw_public))
909  nextToken();
910  if (!FormatTok->is(tok::string_literal))
911  return;
912  nextToken();
913  if (FormatTok->is(tok::semi))
914  nextToken();
915  addUnwrappedLine();
916  return;
917  }
918  }
919  if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
920  Keywords.kw_slots, Keywords.kw_qslots)) {
921  nextToken();
922  if (FormatTok->is(tok::colon)) {
923  nextToken();
924  addUnwrappedLine();
925  return;
926  }
927  }
928  // In all other cases, parse the declaration.
929  break;
930  default:
931  break;
932  }
933  do {
934  const FormatToken *Previous = getPreviousToken();
935  switch (FormatTok->Tok.getKind()) {
936  case tok::at:
937  nextToken();
938  if (FormatTok->Tok.is(tok::l_brace))
939  parseBracedList();
940  break;
941  case tok::kw_enum:
942  // Ignore if this is part of "template <enum ...".
943  if (Previous && Previous->is(tok::less)) {
944  nextToken();
945  break;
946  }
947 
948  // parseEnum falls through and does not yet add an unwrapped line as an
949  // enum definition can start a structural element.
950  if (!parseEnum())
951  break;
952  // This only applies for C++.
953  if (!Style.IsCpp()) {
954  addUnwrappedLine();
955  return;
956  }
957  break;
958  case tok::kw_typedef:
959  nextToken();
960  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
961  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
962  parseEnum();
963  break;
964  case tok::kw_struct:
965  case tok::kw_union:
966  case tok::kw_class:
967  // parseRecord falls through and does not yet add an unwrapped line as a
968  // record declaration or definition can start a structural element.
969  parseRecord();
970  // This does not apply for Java and JavaScript.
971  if (Style.Language == FormatStyle::LK_Java ||
973  if (FormatTok->is(tok::semi))
974  nextToken();
975  addUnwrappedLine();
976  return;
977  }
978  break;
979  case tok::period:
980  nextToken();
981  // In Java, classes have an implicit static member "class".
982  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
983  FormatTok->is(tok::kw_class))
984  nextToken();
985  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
986  FormatTok->Tok.getIdentifierInfo())
987  // JavaScript only has pseudo keywords, all keywords are allowed to
988  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
989  nextToken();
990  break;
991  case tok::semi:
992  nextToken();
993  addUnwrappedLine();
994  return;
995  case tok::r_brace:
996  addUnwrappedLine();
997  return;
998  case tok::l_paren:
999  parseParens();
1000  break;
1001  case tok::kw_operator:
1002  nextToken();
1003  if (FormatTok->isBinaryOperator())
1004  nextToken();
1005  break;
1006  case tok::caret:
1007  nextToken();
1008  if (FormatTok->Tok.isAnyIdentifier() ||
1009  FormatTok->isSimpleTypeSpecifier())
1010  nextToken();
1011  if (FormatTok->is(tok::l_paren))
1012  parseParens();
1013  if (FormatTok->is(tok::l_brace))
1014  parseChildBlock();
1015  break;
1016  case tok::l_brace:
1017  if (!tryToParseBracedList()) {
1018  // A block outside of parentheses must be the last part of a
1019  // structural element.
1020  // FIXME: Figure out cases where this is not true, and add projections
1021  // for them (the one we know is missing are lambdas).
1022  if (Style.BraceWrapping.AfterFunction)
1023  addUnwrappedLine();
1024  FormatTok->Type = TT_FunctionLBrace;
1025  parseBlock(/*MustBeDeclaration=*/false);
1026  addUnwrappedLine();
1027  return;
1028  }
1029  // Otherwise this was a braced init list, and the structural
1030  // element continues.
1031  break;
1032  case tok::kw_try:
1033  // We arrive here when parsing function-try blocks.
1034  parseTryCatch();
1035  return;
1036  case tok::identifier: {
1037  if (FormatTok->is(TT_MacroBlockEnd)) {
1038  addUnwrappedLine();
1039  return;
1040  }
1041 
1042  // Parse function literal unless 'function' is the first token in a line
1043  // in which case this should be treated as a free-standing function.
1044  if (Style.Language == FormatStyle::LK_JavaScript &&
1045  (FormatTok->is(Keywords.kw_function) ||
1046  FormatTok->startsSequence(Keywords.kw_async,
1047  Keywords.kw_function)) &&
1048  Line->Tokens.size() > 0) {
1049  tryToParseJSFunction();
1050  break;
1051  }
1052  if ((Style.Language == FormatStyle::LK_JavaScript ||
1053  Style.Language == FormatStyle::LK_Java) &&
1054  FormatTok->is(Keywords.kw_interface)) {
1055  if (Style.Language == FormatStyle::LK_JavaScript) {
1056  // In JavaScript/TypeScript, "interface" can be used as a standalone
1057  // identifier, e.g. in `var interface = 1;`. If "interface" is
1058  // followed by another identifier, it is very like to be an actual
1059  // interface declaration.
1060  unsigned StoredPosition = Tokens->getPosition();
1061  FormatToken *Next = Tokens->getNextToken();
1062  FormatTok = Tokens->setPosition(StoredPosition);
1063  if (Next && !mustBeJSIdent(Keywords, Next)) {
1064  nextToken();
1065  break;
1066  }
1067  }
1068  parseRecord();
1069  addUnwrappedLine();
1070  return;
1071  }
1072 
1073  // See if the following token should start a new unwrapped line.
1074  StringRef Text = FormatTok->TokenText;
1075  nextToken();
1076  if (Line->Tokens.size() == 1 &&
1077  // JS doesn't have macros, and within classes colons indicate fields,
1078  // not labels.
1080  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1081  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1082  parseLabel();
1083  return;
1084  }
1085  // Recognize function-like macro usages without trailing semicolon as
1086  // well as free-standing macros like Q_OBJECT.
1087  bool FunctionLike = FormatTok->is(tok::l_paren);
1088  if (FunctionLike)
1089  parseParens();
1090 
1091  bool FollowedByNewline =
1092  CommentsBeforeNextToken.empty()
1093  ? FormatTok->NewlinesBefore > 0
1094  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1095 
1096  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1097  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1098  addUnwrappedLine();
1099  return;
1100  }
1101  }
1102  break;
1103  }
1104  case tok::equal:
1105  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1106  // TT_JsFatArrow. The always start an expression or a child block if
1107  // followed by a curly.
1108  if (FormatTok->is(TT_JsFatArrow)) {
1109  nextToken();
1110  if (FormatTok->is(tok::l_brace))
1111  parseChildBlock();
1112  break;
1113  }
1114 
1115  nextToken();
1116  if (FormatTok->Tok.is(tok::l_brace)) {
1117  parseBracedList();
1118  }
1119  break;
1120  case tok::l_square:
1121  parseSquare();
1122  break;
1123  case tok::kw_new:
1124  parseNew();
1125  break;
1126  default:
1127  nextToken();
1128  break;
1129  }
1130  } while (!eof());
1131 }
1132 
1133 bool UnwrappedLineParser::tryToParseLambda() {
1134  if (!Style.IsCpp()) {
1135  nextToken();
1136  return false;
1137  }
1138  const FormatToken* Previous = getPreviousToken();
1139  if (Previous &&
1140  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1141  tok::kw_delete) ||
1142  Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1143  nextToken();
1144  return false;
1145  }
1146  assert(FormatTok->is(tok::l_square));
1147  FormatToken &LSquare = *FormatTok;
1148  if (!tryToParseLambdaIntroducer())
1149  return false;
1150 
1151  while (FormatTok->isNot(tok::l_brace)) {
1152  if (FormatTok->isSimpleTypeSpecifier()) {
1153  nextToken();
1154  continue;
1155  }
1156  switch (FormatTok->Tok.getKind()) {
1157  case tok::l_brace:
1158  break;
1159  case tok::l_paren:
1160  parseParens();
1161  break;
1162  case tok::amp:
1163  case tok::star:
1164  case tok::kw_const:
1165  case tok::comma:
1166  case tok::less:
1167  case tok::greater:
1168  case tok::identifier:
1169  case tok::numeric_constant:
1170  case tok::coloncolon:
1171  case tok::kw_mutable:
1172  nextToken();
1173  break;
1174  case tok::arrow:
1175  FormatTok->Type = TT_LambdaArrow;
1176  nextToken();
1177  break;
1178  default:
1179  return true;
1180  }
1181  }
1182  LSquare.Type = TT_LambdaLSquare;
1183  parseChildBlock();
1184  return true;
1185 }
1186 
1187 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1188  nextToken();
1189  if (FormatTok->is(tok::equal)) {
1190  nextToken();
1191  if (FormatTok->is(tok::r_square)) {
1192  nextToken();
1193  return true;
1194  }
1195  if (FormatTok->isNot(tok::comma))
1196  return false;
1197  nextToken();
1198  } else if (FormatTok->is(tok::amp)) {
1199  nextToken();
1200  if (FormatTok->is(tok::r_square)) {
1201  nextToken();
1202  return true;
1203  }
1204  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1205  return false;
1206  }
1207  if (FormatTok->is(tok::comma))
1208  nextToken();
1209  } else if (FormatTok->is(tok::r_square)) {
1210  nextToken();
1211  return true;
1212  }
1213  do {
1214  if (FormatTok->is(tok::amp))
1215  nextToken();
1216  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1217  return false;
1218  nextToken();
1219  if (FormatTok->is(tok::ellipsis))
1220  nextToken();
1221  if (FormatTok->is(tok::comma)) {
1222  nextToken();
1223  } else if (FormatTok->is(tok::r_square)) {
1224  nextToken();
1225  return true;
1226  } else {
1227  return false;
1228  }
1229  } while (!eof());
1230  return false;
1231 }
1232 
1233 void UnwrappedLineParser::tryToParseJSFunction() {
1234  assert(FormatTok->is(Keywords.kw_function) ||
1235  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1236  if (FormatTok->is(Keywords.kw_async))
1237  nextToken();
1238  // Consume "function".
1239  nextToken();
1240 
1241  // Consume * (generator function). Treat it like C++'s overloaded operators.
1242  if (FormatTok->is(tok::star)) {
1243  FormatTok->Type = TT_OverloadedOperator;
1244  nextToken();
1245  }
1246 
1247  // Consume function name.
1248  if (FormatTok->is(tok::identifier))
1249  nextToken();
1250 
1251  if (FormatTok->isNot(tok::l_paren))
1252  return;
1253 
1254  // Parse formal parameter list.
1255  parseParens();
1256 
1257  if (FormatTok->is(tok::colon)) {
1258  // Parse a type definition.
1259  nextToken();
1260 
1261  // Eat the type declaration. For braced inline object types, balance braces,
1262  // otherwise just parse until finding an l_brace for the function body.
1263  if (FormatTok->is(tok::l_brace))
1264  tryToParseBracedList();
1265  else
1266  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1267  nextToken();
1268  }
1269 
1270  if (FormatTok->is(tok::semi))
1271  return;
1272 
1273  parseChildBlock();
1274 }
1275 
1276 bool UnwrappedLineParser::tryToParseBracedList() {
1277  if (FormatTok->BlockKind == BK_Unknown)
1278  calculateBraceTypes();
1279  assert(FormatTok->BlockKind != BK_Unknown);
1280  if (FormatTok->BlockKind == BK_Block)
1281  return false;
1282  parseBracedList();
1283  return true;
1284 }
1285 
1286 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1287  bool HasError = false;
1288  nextToken();
1289 
1290  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1291  // replace this by using parseAssigmentExpression() inside.
1292  do {
1293  if (Style.Language == FormatStyle::LK_JavaScript) {
1294  if (FormatTok->is(Keywords.kw_function) ||
1295  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1296  tryToParseJSFunction();
1297  continue;
1298  }
1299  if (FormatTok->is(TT_JsFatArrow)) {
1300  nextToken();
1301  // Fat arrows can be followed by simple expressions or by child blocks
1302  // in curly braces.
1303  if (FormatTok->is(tok::l_brace)) {
1304  parseChildBlock();
1305  continue;
1306  }
1307  }
1308  if (FormatTok->is(tok::l_brace)) {
1309  // Could be a method inside of a braced list `{a() { return 1; }}`.
1310  if (tryToParseBracedList())
1311  continue;
1312  parseChildBlock();
1313  }
1314  }
1315  switch (FormatTok->Tok.getKind()) {
1316  case tok::caret:
1317  nextToken();
1318  if (FormatTok->is(tok::l_brace)) {
1319  parseChildBlock();
1320  }
1321  break;
1322  case tok::l_square:
1323  tryToParseLambda();
1324  break;
1325  case tok::l_paren:
1326  parseParens();
1327  // JavaScript can just have free standing methods and getters/setters in
1328  // object literals. Detect them by a "{" following ")".
1329  if (Style.Language == FormatStyle::LK_JavaScript) {
1330  if (FormatTok->is(tok::l_brace))
1331  parseChildBlock();
1332  break;
1333  }
1334  break;
1335  case tok::l_brace:
1336  // Assume there are no blocks inside a braced init list apart
1337  // from the ones we explicitly parse out (like lambdas).
1338  FormatTok->BlockKind = BK_BracedInit;
1339  parseBracedList();
1340  break;
1341  case tok::r_brace:
1342  nextToken();
1343  return !HasError;
1344  case tok::semi:
1345  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1346  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1347  // used for error recovery if we have otherwise determined that this is
1348  // a braced list.
1349  if (Style.Language == FormatStyle::LK_JavaScript) {
1350  nextToken();
1351  break;
1352  }
1353  HasError = true;
1354  if (!ContinueOnSemicolons)
1355  return !HasError;
1356  nextToken();
1357  break;
1358  case tok::comma:
1359  nextToken();
1360  break;
1361  default:
1362  nextToken();
1363  break;
1364  }
1365  } while (!eof());
1366  return false;
1367 }
1368 
1369 void UnwrappedLineParser::parseParens() {
1370  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1371  nextToken();
1372  do {
1373  switch (FormatTok->Tok.getKind()) {
1374  case tok::l_paren:
1375  parseParens();
1376  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1377  parseChildBlock();
1378  break;
1379  case tok::r_paren:
1380  nextToken();
1381  return;
1382  case tok::r_brace:
1383  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1384  return;
1385  case tok::l_square:
1386  tryToParseLambda();
1387  break;
1388  case tok::l_brace:
1389  if (!tryToParseBracedList())
1390  parseChildBlock();
1391  break;
1392  case tok::at:
1393  nextToken();
1394  if (FormatTok->Tok.is(tok::l_brace))
1395  parseBracedList();
1396  break;
1397  case tok::kw_class:
1398  if (Style.Language == FormatStyle::LK_JavaScript)
1399  parseRecord(/*ParseAsExpr=*/true);
1400  else
1401  nextToken();
1402  break;
1403  case tok::identifier:
1404  if (Style.Language == FormatStyle::LK_JavaScript &&
1405  (FormatTok->is(Keywords.kw_function) ||
1406  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1407  tryToParseJSFunction();
1408  else
1409  nextToken();
1410  break;
1411  default:
1412  nextToken();
1413  break;
1414  }
1415  } while (!eof());
1416 }
1417 
1418 void UnwrappedLineParser::parseSquare() {
1419  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1420  if (tryToParseLambda())
1421  return;
1422  do {
1423  switch (FormatTok->Tok.getKind()) {
1424  case tok::l_paren:
1425  parseParens();
1426  break;
1427  case tok::r_square:
1428  nextToken();
1429  return;
1430  case tok::r_brace:
1431  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1432  return;
1433  case tok::l_square:
1434  parseSquare();
1435  break;
1436  case tok::l_brace: {
1437  if (!tryToParseBracedList())
1438  parseChildBlock();
1439  break;
1440  }
1441  case tok::at:
1442  nextToken();
1443  if (FormatTok->Tok.is(tok::l_brace))
1444  parseBracedList();
1445  break;
1446  default:
1447  nextToken();
1448  break;
1449  }
1450  } while (!eof());
1451 }
1452 
1453 void UnwrappedLineParser::parseIfThenElse() {
1454  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1455  nextToken();
1456  if (FormatTok->Tok.is(tok::l_paren))
1457  parseParens();
1458  bool NeedsUnwrappedLine = false;
1459  if (FormatTok->Tok.is(tok::l_brace)) {
1460  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1461  parseBlock(/*MustBeDeclaration=*/false);
1462  if (Style.BraceWrapping.BeforeElse)
1463  addUnwrappedLine();
1464  else
1465  NeedsUnwrappedLine = true;
1466  } else {
1467  addUnwrappedLine();
1468  ++Line->Level;
1469  parseStructuralElement();
1470  --Line->Level;
1471  }
1472  if (FormatTok->Tok.is(tok::kw_else)) {
1473  nextToken();
1474  if (FormatTok->Tok.is(tok::l_brace)) {
1475  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1476  parseBlock(/*MustBeDeclaration=*/false);
1477  addUnwrappedLine();
1478  } else if (FormatTok->Tok.is(tok::kw_if)) {
1479  parseIfThenElse();
1480  } else {
1481  addUnwrappedLine();
1482  ++Line->Level;
1483  parseStructuralElement();
1484  if (FormatTok->is(tok::eof))
1485  addUnwrappedLine();
1486  --Line->Level;
1487  }
1488  } else if (NeedsUnwrappedLine) {
1489  addUnwrappedLine();
1490  }
1491 }
1492 
1493 void UnwrappedLineParser::parseTryCatch() {
1494  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1495  nextToken();
1496  bool NeedsUnwrappedLine = false;
1497  if (FormatTok->is(tok::colon)) {
1498  // We are in a function try block, what comes is an initializer list.
1499  nextToken();
1500  while (FormatTok->is(tok::identifier)) {
1501  nextToken();
1502  if (FormatTok->is(tok::l_paren))
1503  parseParens();
1504  if (FormatTok->is(tok::comma))
1505  nextToken();
1506  }
1507  }
1508  // Parse try with resource.
1509  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1510  parseParens();
1511  }
1512  if (FormatTok->is(tok::l_brace)) {
1513  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1514  parseBlock(/*MustBeDeclaration=*/false);
1515  if (Style.BraceWrapping.BeforeCatch) {
1516  addUnwrappedLine();
1517  } else {
1518  NeedsUnwrappedLine = true;
1519  }
1520  } else if (!FormatTok->is(tok::kw_catch)) {
1521  // The C++ standard requires a compound-statement after a try.
1522  // If there's none, we try to assume there's a structuralElement
1523  // and try to continue.
1524  addUnwrappedLine();
1525  ++Line->Level;
1526  parseStructuralElement();
1527  --Line->Level;
1528  }
1529  while (1) {
1530  if (FormatTok->is(tok::at))
1531  nextToken();
1532  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1533  tok::kw___finally) ||
1534  ((Style.Language == FormatStyle::LK_Java ||
1536  FormatTok->is(Keywords.kw_finally)) ||
1537  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1538  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1539  break;
1540  nextToken();
1541  while (FormatTok->isNot(tok::l_brace)) {
1542  if (FormatTok->is(tok::l_paren)) {
1543  parseParens();
1544  continue;
1545  }
1546  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1547  return;
1548  nextToken();
1549  }
1550  NeedsUnwrappedLine = false;
1551  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1552  parseBlock(/*MustBeDeclaration=*/false);
1553  if (Style.BraceWrapping.BeforeCatch)
1554  addUnwrappedLine();
1555  else
1556  NeedsUnwrappedLine = true;
1557  }
1558  if (NeedsUnwrappedLine)
1559  addUnwrappedLine();
1560 }
1561 
1562 void UnwrappedLineParser::parseNamespace() {
1563  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1564 
1565  const FormatToken &InitialToken = *FormatTok;
1566  nextToken();
1567  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1568  nextToken();
1569  if (FormatTok->Tok.is(tok::l_brace)) {
1570  if (ShouldBreakBeforeBrace(Style, InitialToken))
1571  addUnwrappedLine();
1572 
1573  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1575  DeclarationScopeStack.size() > 1);
1576  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1577  // Munch the semicolon after a namespace. This is more common than one would
1578  // think. Puttin the semicolon into its own line is very ugly.
1579  if (FormatTok->Tok.is(tok::semi))
1580  nextToken();
1581  addUnwrappedLine();
1582  }
1583  // FIXME: Add error handling.
1584 }
1585 
1586 void UnwrappedLineParser::parseNew() {
1587  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1588  nextToken();
1589  if (Style.Language != FormatStyle::LK_Java)
1590  return;
1591 
1592  // In Java, we can parse everything up to the parens, which aren't optional.
1593  do {
1594  // There should not be a ;, { or } before the new's open paren.
1595  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1596  return;
1597 
1598  // Consume the parens.
1599  if (FormatTok->is(tok::l_paren)) {
1600  parseParens();
1601 
1602  // If there is a class body of an anonymous class, consume that as child.
1603  if (FormatTok->is(tok::l_brace))
1604  parseChildBlock();
1605  return;
1606  }
1607  nextToken();
1608  } while (!eof());
1609 }
1610 
1611 void UnwrappedLineParser::parseForOrWhileLoop() {
1612  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1613  "'for', 'while' or foreach macro expected");
1614  nextToken();
1615  if (FormatTok->Tok.is(tok::l_paren))
1616  parseParens();
1617  if (FormatTok->Tok.is(tok::l_brace)) {
1618  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1619  parseBlock(/*MustBeDeclaration=*/false);
1620  addUnwrappedLine();
1621  } else {
1622  addUnwrappedLine();
1623  ++Line->Level;
1624  parseStructuralElement();
1625  --Line->Level;
1626  }
1627 }
1628 
1629 void UnwrappedLineParser::parseDoWhile() {
1630  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1631  nextToken();
1632  if (FormatTok->Tok.is(tok::l_brace)) {
1633  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1634  parseBlock(/*MustBeDeclaration=*/false);
1635  if (Style.BraceWrapping.IndentBraces)
1636  addUnwrappedLine();
1637  } else {
1638  addUnwrappedLine();
1639  ++Line->Level;
1640  parseStructuralElement();
1641  --Line->Level;
1642  }
1643 
1644  // FIXME: Add error handling.
1645  if (!FormatTok->Tok.is(tok::kw_while)) {
1646  addUnwrappedLine();
1647  return;
1648  }
1649 
1650  nextToken();
1651  parseStructuralElement();
1652 }
1653 
1654 void UnwrappedLineParser::parseLabel() {
1655  nextToken();
1656  unsigned OldLineLevel = Line->Level;
1657  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1658  --Line->Level;
1659  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1660  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1661  parseBlock(/*MustBeDeclaration=*/false);
1662  if (FormatTok->Tok.is(tok::kw_break)) {
1664  addUnwrappedLine();
1665  parseStructuralElement();
1666  }
1667  addUnwrappedLine();
1668  } else {
1669  if (FormatTok->is(tok::semi))
1670  nextToken();
1671  addUnwrappedLine();
1672  }
1673  Line->Level = OldLineLevel;
1674  if (FormatTok->isNot(tok::l_brace)) {
1675  parseStructuralElement();
1676  addUnwrappedLine();
1677  }
1678 }
1679 
1680 void UnwrappedLineParser::parseCaseLabel() {
1681  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1682  // FIXME: fix handling of complex expressions here.
1683  do {
1684  nextToken();
1685  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1686  parseLabel();
1687 }
1688 
1689 void UnwrappedLineParser::parseSwitch() {
1690  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1691  nextToken();
1692  if (FormatTok->Tok.is(tok::l_paren))
1693  parseParens();
1694  if (FormatTok->Tok.is(tok::l_brace)) {
1695  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1696  parseBlock(/*MustBeDeclaration=*/false);
1697  addUnwrappedLine();
1698  } else {
1699  addUnwrappedLine();
1700  ++Line->Level;
1701  parseStructuralElement();
1702  --Line->Level;
1703  }
1704 }
1705 
1706 void UnwrappedLineParser::parseAccessSpecifier() {
1707  nextToken();
1708  // Understand Qt's slots.
1709  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1710  nextToken();
1711  // Otherwise, we don't know what it is, and we'd better keep the next token.
1712  if (FormatTok->Tok.is(tok::colon))
1713  nextToken();
1714  addUnwrappedLine();
1715 }
1716 
1717 bool UnwrappedLineParser::parseEnum() {
1718  // Won't be 'enum' for NS_ENUMs.
1719  if (FormatTok->Tok.is(tok::kw_enum))
1720  nextToken();
1721 
1722  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1723  // declarations. An "enum" keyword followed by a colon would be a syntax
1724  // error and thus assume it is just an identifier.
1725  if (Style.Language == FormatStyle::LK_JavaScript &&
1726  FormatTok->isOneOf(tok::colon, tok::question))
1727  return false;
1728 
1729  // Eat up enum class ...
1730  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1731  nextToken();
1732 
1733  while (FormatTok->Tok.getIdentifierInfo() ||
1734  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1735  tok::greater, tok::comma, tok::question)) {
1736  nextToken();
1737  // We can have macros or attributes in between 'enum' and the enum name.
1738  if (FormatTok->is(tok::l_paren))
1739  parseParens();
1740  if (FormatTok->is(tok::identifier)) {
1741  nextToken();
1742  // If there are two identifiers in a row, this is likely an elaborate
1743  // return type. In Java, this can be "implements", etc.
1744  if (Style.IsCpp() && FormatTok->is(tok::identifier))
1745  return false;
1746  }
1747  }
1748 
1749  // Just a declaration or something is wrong.
1750  if (FormatTok->isNot(tok::l_brace))
1751  return true;
1752  FormatTok->BlockKind = BK_Block;
1753 
1754  if (Style.Language == FormatStyle::LK_Java) {
1755  // Java enums are different.
1756  parseJavaEnumBody();
1757  return true;
1758  }
1759  if (Style.Language == FormatStyle::LK_Proto) {
1760  parseBlock(/*MustBeDeclaration=*/true);
1761  return true;
1762  }
1763 
1764  // Parse enum body.
1765  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1766  if (HasError) {
1767  if (FormatTok->is(tok::semi))
1768  nextToken();
1769  addUnwrappedLine();
1770  }
1771  return true;
1772 
1773  // There is no addUnwrappedLine() here so that we fall through to parsing a
1774  // structural element afterwards. Thus, in "enum A {} n, m;",
1775  // "} n, m;" will end up in one unwrapped line.
1776 }
1777 
1778 void UnwrappedLineParser::parseJavaEnumBody() {
1779  // Determine whether the enum is simple, i.e. does not have a semicolon or
1780  // constants with class bodies. Simple enums can be formatted like braced
1781  // lists, contracted to a single line, etc.
1782  unsigned StoredPosition = Tokens->getPosition();
1783  bool IsSimple = true;
1784  FormatToken *Tok = Tokens->getNextToken();
1785  while (Tok) {
1786  if (Tok->is(tok::r_brace))
1787  break;
1788  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1789  IsSimple = false;
1790  break;
1791  }
1792  // FIXME: This will also mark enums with braces in the arguments to enum
1793  // constants as "not simple". This is probably fine in practice, though.
1794  Tok = Tokens->getNextToken();
1795  }
1796  FormatTok = Tokens->setPosition(StoredPosition);
1797 
1798  if (IsSimple) {
1799  parseBracedList();
1800  addUnwrappedLine();
1801  return;
1802  }
1803 
1804  // Parse the body of a more complex enum.
1805  // First add a line for everything up to the "{".
1806  nextToken();
1807  addUnwrappedLine();
1808  ++Line->Level;
1809 
1810  // Parse the enum constants.
1811  while (FormatTok) {
1812  if (FormatTok->is(tok::l_brace)) {
1813  // Parse the constant's class body.
1814  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1815  /*MunchSemi=*/false);
1816  } else if (FormatTok->is(tok::l_paren)) {
1817  parseParens();
1818  } else if (FormatTok->is(tok::comma)) {
1819  nextToken();
1820  addUnwrappedLine();
1821  } else if (FormatTok->is(tok::semi)) {
1822  nextToken();
1823  addUnwrappedLine();
1824  break;
1825  } else if (FormatTok->is(tok::r_brace)) {
1826  addUnwrappedLine();
1827  break;
1828  } else {
1829  nextToken();
1830  }
1831  }
1832 
1833  // Parse the class body after the enum's ";" if any.
1834  parseLevel(/*HasOpeningBrace=*/true);
1835  nextToken();
1836  --Line->Level;
1837  addUnwrappedLine();
1838 }
1839 
1840 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1841  const FormatToken &InitialToken = *FormatTok;
1842  nextToken();
1843 
1844  // The actual identifier can be a nested name specifier, and in macros
1845  // it is often token-pasted.
1846  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1847  tok::kw___attribute, tok::kw___declspec,
1848  tok::kw_alignas) ||
1849  ((Style.Language == FormatStyle::LK_Java ||
1851  FormatTok->isOneOf(tok::period, tok::comma))) {
1852  bool IsNonMacroIdentifier =
1853  FormatTok->is(tok::identifier) &&
1854  FormatTok->TokenText != FormatTok->TokenText.upper();
1855  nextToken();
1856  // We can have macros or attributes in between 'class' and the class name.
1857  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1858  parseParens();
1859  }
1860 
1861  // Note that parsing away template declarations here leads to incorrectly
1862  // accepting function declarations as record declarations.
1863  // In general, we cannot solve this problem. Consider:
1864  // class A<int> B() {}
1865  // which can be a function definition or a class definition when B() is a
1866  // macro. If we find enough real-world cases where this is a problem, we
1867  // can parse for the 'template' keyword in the beginning of the statement,
1868  // and thus rule out the record production in case there is no template
1869  // (this would still leave us with an ambiguity between template function
1870  // and class declarations).
1871  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1872  while (!eof()) {
1873  if (FormatTok->is(tok::l_brace)) {
1874  calculateBraceTypes(/*ExpectClassBody=*/true);
1875  if (!tryToParseBracedList())
1876  break;
1877  }
1878  if (FormatTok->Tok.is(tok::semi))
1879  return;
1880  nextToken();
1881  }
1882  }
1883  if (FormatTok->Tok.is(tok::l_brace)) {
1884  if (ParseAsExpr) {
1885  parseChildBlock();
1886  } else {
1887  if (ShouldBreakBeforeBrace(Style, InitialToken))
1888  addUnwrappedLine();
1889 
1890  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1891  /*MunchSemi=*/false);
1892  }
1893  }
1894  // There is no addUnwrappedLine() here so that we fall through to parsing a
1895  // structural element afterwards. Thus, in "class A {} n, m;",
1896  // "} n, m;" will end up in one unwrapped line.
1897 }
1898 
1899 void UnwrappedLineParser::parseObjCProtocolList() {
1900  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1901  do
1902  nextToken();
1903  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1904  nextToken(); // Skip '>'.
1905 }
1906 
1907 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1908  do {
1909  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1910  nextToken();
1911  addUnwrappedLine();
1912  break;
1913  }
1914  if (FormatTok->is(tok::l_brace)) {
1915  parseBlock(/*MustBeDeclaration=*/false);
1916  // In ObjC interfaces, nothing should be following the "}".
1917  addUnwrappedLine();
1918  } else if (FormatTok->is(tok::r_brace)) {
1919  // Ignore stray "}". parseStructuralElement doesn't consume them.
1920  nextToken();
1921  addUnwrappedLine();
1922  } else {
1923  parseStructuralElement();
1924  }
1925  } while (!eof());
1926 }
1927 
1928 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1929  nextToken();
1930  nextToken(); // interface name
1931 
1932  // @interface can be followed by either a base class, or a category.
1933  if (FormatTok->Tok.is(tok::colon)) {
1934  nextToken();
1935  nextToken(); // base class name
1936  } else if (FormatTok->Tok.is(tok::l_paren))
1937  // Skip category, if present.
1938  parseParens();
1939 
1940  if (FormatTok->Tok.is(tok::less))
1941  parseObjCProtocolList();
1942 
1943  if (FormatTok->Tok.is(tok::l_brace)) {
1945  addUnwrappedLine();
1946  parseBlock(/*MustBeDeclaration=*/true);
1947  }
1948 
1949  // With instance variables, this puts '}' on its own line. Without instance
1950  // variables, this ends the @interface line.
1951  addUnwrappedLine();
1952 
1953  parseObjCUntilAtEnd();
1954 }
1955 
1956 void UnwrappedLineParser::parseObjCProtocol() {
1957  nextToken();
1958  nextToken(); // protocol name
1959 
1960  if (FormatTok->Tok.is(tok::less))
1961  parseObjCProtocolList();
1962 
1963  // Check for protocol declaration.
1964  if (FormatTok->Tok.is(tok::semi)) {
1965  nextToken();
1966  return addUnwrappedLine();
1967  }
1968 
1969  addUnwrappedLine();
1970  parseObjCUntilAtEnd();
1971 }
1972 
1973 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1974  bool IsImport = FormatTok->is(Keywords.kw_import);
1975  assert(IsImport || FormatTok->is(tok::kw_export));
1976  nextToken();
1977 
1978  // Consume the "default" in "export default class/function".
1979  if (FormatTok->is(tok::kw_default))
1980  nextToken();
1981 
1982  // Consume "async function", "function" and "default function", so that these
1983  // get parsed as free-standing JS functions, i.e. do not require a trailing
1984  // semicolon.
1985  if (FormatTok->is(Keywords.kw_async))
1986  nextToken();
1987  if (FormatTok->is(Keywords.kw_function)) {
1988  nextToken();
1989  return;
1990  }
1991 
1992  // For imports, `export *`, `export {...}`, consume the rest of the line up
1993  // to the terminating `;`. For everything else, just return and continue
1994  // parsing the structural element, i.e. the declaration or expression for
1995  // `export default`.
1996  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1997  !FormatTok->isStringLiteral())
1998  return;
1999 
2000  while (!eof()) {
2001  if (FormatTok->is(tok::semi))
2002  return;
2003  if (Line->Tokens.size() == 0) {
2004  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2005  // import statement should terminate.
2006  return;
2007  }
2008  if (FormatTok->is(tok::l_brace)) {
2009  FormatTok->BlockKind = BK_Block;
2010  parseBracedList();
2011  } else {
2012  nextToken();
2013  }
2014  }
2015 }
2016 
2017 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2018  StringRef Prefix = "") {
2019  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2020  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2021  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2022  E = Line.Tokens.end();
2023  I != E; ++I) {
2024  llvm::dbgs() << I->Tok->Tok.getName() << "["
2025  << "T=" << I->Tok->Type
2026  << ", OC=" << I->Tok->OriginalColumn << "] ";
2027  }
2028  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2029  E = Line.Tokens.end();
2030  I != E; ++I) {
2031  const UnwrappedLineNode &Node = *I;
2033  I = Node.Children.begin(),
2034  E = Node.Children.end();
2035  I != E; ++I) {
2036  printDebugInfo(*I, "\nChild: ");
2037  }
2038  }
2039  llvm::dbgs() << "\n";
2040 }
2041 
2042 void UnwrappedLineParser::addUnwrappedLine() {
2043  if (Line->Tokens.empty())
2044  return;
2045  DEBUG({
2046  if (CurrentLines == &Lines)
2047  printDebugInfo(*Line);
2048  });
2049  CurrentLines->push_back(std::move(*Line));
2050  Line->Tokens.clear();
2051  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2052  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2053  CurrentLines->append(
2054  std::make_move_iterator(PreprocessorDirectives.begin()),
2055  std::make_move_iterator(PreprocessorDirectives.end()));
2056  PreprocessorDirectives.clear();
2057  }
2058 }
2059 
2060 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2061 
2062 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2063  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2064  FormatTok.NewlinesBefore > 0;
2065 }
2066 
2067 static bool isLineComment(const FormatToken &FormatTok) {
2068  return FormatTok.is(tok::comment) &&
2069  FormatTok.TokenText.startswith("//");
2070 }
2071 
2072 // Checks if \p FormatTok is a line comment that continues the line comment
2073 // section on \p Line.
2074 static bool continuesLineComment(const FormatToken &FormatTok,
2075  const UnwrappedLine &Line,
2076  llvm::Regex &CommentPragmasRegex) {
2077  if (Line.Tokens.empty())
2078  return false;
2079 
2080  StringRef IndentContent = FormatTok.TokenText;
2081  if (FormatTok.TokenText.startswith("//") ||
2082  FormatTok.TokenText.startswith("/*"))
2083  IndentContent = FormatTok.TokenText.substr(2);
2084  if (CommentPragmasRegex.match(IndentContent))
2085  return false;
2086 
2087  // If Line starts with a line comment, then FormatTok continues the comment
2088  // section if its original column is greater or equal to the original start
2089  // column of the line.
2090  //
2091  // Define the min column token of a line as follows: if a line ends in '{' or
2092  // contains a '{' followed by a line comment, then the min column token is
2093  // that '{'. Otherwise, the min column token of the line is the first token of
2094  // the line.
2095  //
2096  // If Line starts with a token other than a line comment, then FormatTok
2097  // continues the comment section if its original column is greater than the
2098  // original start column of the min column token of the line.
2099  //
2100  // For example, the second line comment continues the first in these cases:
2101  //
2102  // // first line
2103  // // second line
2104  //
2105  // and:
2106  //
2107  // // first line
2108  // // second line
2109  //
2110  // and:
2111  //
2112  // int i; // first line
2113  // // second line
2114  //
2115  // and:
2116  //
2117  // do { // first line
2118  // // second line
2119  // int i;
2120  // } while (true);
2121  //
2122  // and:
2123  //
2124  // enum {
2125  // a, // first line
2126  // // second line
2127  // b
2128  // };
2129  //
2130  // The second line comment doesn't continue the first in these cases:
2131  //
2132  // // first line
2133  // // second line
2134  //
2135  // and:
2136  //
2137  // int i; // first line
2138  // // second line
2139  //
2140  // and:
2141  //
2142  // do { // first line
2143  // // second line
2144  // int i;
2145  // } while (true);
2146  //
2147  // and:
2148  //
2149  // enum {
2150  // a, // first line
2151  // // second line
2152  // };
2153  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2154 
2155  // Scan for '{//'. If found, use the column of '{' as a min column for line
2156  // comment section continuation.
2157  const FormatToken *PreviousToken = nullptr;
2158  for (const UnwrappedLineNode &Node : Line.Tokens) {
2159  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2160  isLineComment(*Node.Tok)) {
2161  MinColumnToken = PreviousToken;
2162  break;
2163  }
2164  PreviousToken = Node.Tok;
2165 
2166  // Grab the last newline preceding a token in this unwrapped line.
2167  if (Node.Tok->NewlinesBefore > 0) {
2168  MinColumnToken = Node.Tok;
2169  }
2170  }
2171  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2172  MinColumnToken = PreviousToken;
2173  }
2174 
2175  unsigned MinContinueColumn =
2176  MinColumnToken->OriginalColumn +
2177  (isLineComment(*MinColumnToken) ? 0 : 1);
2178  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2179  isLineComment(*(Line.Tokens.back().Tok)) &&
2180  FormatTok.OriginalColumn >= MinContinueColumn;
2181 }
2182 
2183 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2184  bool JustComments = Line->Tokens.empty();
2186  I = CommentsBeforeNextToken.begin(),
2187  E = CommentsBeforeNextToken.end();
2188  I != E; ++I) {
2189  // Line comments that belong to the same line comment section are put on the
2190  // same line since later we might want to reflow content between them.
2191  // Additional fine-grained breaking of line comment sections is controlled
2192  // by the class BreakableLineCommentSection in case it is desirable to keep
2193  // several line comment sections in the same unwrapped line.
2194  //
2195  // FIXME: Consider putting separate line comment sections as children to the
2196  // unwrapped line instead.
2197  (*I)->ContinuesLineCommentSection =
2198  continuesLineComment(**I, *Line, CommentPragmasRegex);
2199  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2200  addUnwrappedLine();
2201  pushToken(*I);
2202  }
2203  if (NewlineBeforeNext && JustComments)
2204  addUnwrappedLine();
2205  CommentsBeforeNextToken.clear();
2206 }
2207 
2208 void UnwrappedLineParser::nextToken() {
2209  if (eof())
2210  return;
2211  flushComments(isOnNewLine(*FormatTok));
2212  pushToken(FormatTok);
2213  if (Style.Language != FormatStyle::LK_JavaScript)
2214  readToken();
2215  else
2216  readTokenWithJavaScriptASI();
2217 }
2218 
2219 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2220  // FIXME: This is a dirty way to access the previous token. Find a better
2221  // solution.
2222  if (!Line || Line->Tokens.empty())
2223  return nullptr;
2224  return Line->Tokens.back().Tok;
2225 }
2226 
2227 void UnwrappedLineParser::distributeComments(
2228  const SmallVectorImpl<FormatToken *> &Comments,
2229  const FormatToken *NextTok) {
2230  // Whether or not a line comment token continues a line is controlled by
2231  // the method continuesLineComment, with the following caveat:
2232  //
2233  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2234  // that each comment line from the trail is aligned with the next token, if
2235  // the next token exists. If a trail exists, the beginning of the maximal
2236  // trail is marked as a start of a new comment section.
2237  //
2238  // For example in this code:
2239  //
2240  // int a; // line about a
2241  // // line 1 about b
2242  // // line 2 about b
2243  // int b;
2244  //
2245  // the two lines about b form a maximal trail, so there are two sections, the
2246  // first one consisting of the single comment "// line about a" and the
2247  // second one consisting of the next two comments.
2248  if (Comments.empty())
2249  return;
2250  bool ShouldPushCommentsInCurrentLine = true;
2251  bool HasTrailAlignedWithNextToken = false;
2252  unsigned StartOfTrailAlignedWithNextToken = 0;
2253  if (NextTok) {
2254  // We are skipping the first element intentionally.
2255  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2256  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2257  HasTrailAlignedWithNextToken = true;
2258  StartOfTrailAlignedWithNextToken = i;
2259  }
2260  }
2261  }
2262  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2263  FormatToken *FormatTok = Comments[i];
2264  if (HasTrailAlignedWithNextToken &&
2265  i == StartOfTrailAlignedWithNextToken) {
2266  FormatTok->ContinuesLineCommentSection = false;
2267  } else {
2268  FormatTok->ContinuesLineCommentSection =
2269  continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2270  }
2271  if (!FormatTok->ContinuesLineCommentSection &&
2272  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2273  ShouldPushCommentsInCurrentLine = false;
2274  }
2275  if (ShouldPushCommentsInCurrentLine) {
2276  pushToken(FormatTok);
2277  } else {
2278  CommentsBeforeNextToken.push_back(FormatTok);
2279  }
2280  }
2281 }
2282 
2283 void UnwrappedLineParser::readToken() {
2285  do {
2286  FormatTok = Tokens->getNextToken();
2287  assert(FormatTok);
2288  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2289  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2290  distributeComments(Comments, FormatTok);
2291  Comments.clear();
2292  // If there is an unfinished unwrapped line, we flush the preprocessor
2293  // directives only after that unwrapped line was finished later.
2294  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2295  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2296  // Comments stored before the preprocessor directive need to be output
2297  // before the preprocessor directive, at the same level as the
2298  // preprocessor directive, as we consider them to apply to the directive.
2299  flushComments(isOnNewLine(*FormatTok));
2300  parsePPDirective();
2301  }
2302  while (FormatTok->Type == TT_ConflictStart ||
2303  FormatTok->Type == TT_ConflictEnd ||
2304  FormatTok->Type == TT_ConflictAlternative) {
2305  if (FormatTok->Type == TT_ConflictStart) {
2306  conditionalCompilationStart(/*Unreachable=*/false);
2307  } else if (FormatTok->Type == TT_ConflictAlternative) {
2308  conditionalCompilationAlternative();
2309  } else if (FormatTok->Type == TT_ConflictEnd) {
2310  conditionalCompilationEnd();
2311  }
2312  FormatTok = Tokens->getNextToken();
2313  FormatTok->MustBreakBefore = true;
2314  }
2315 
2316  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2317  !Line->InPPDirective) {
2318  continue;
2319  }
2320 
2321  if (!FormatTok->Tok.is(tok::comment)) {
2322  distributeComments(Comments, FormatTok);
2323  Comments.clear();
2324  return;
2325  }
2326 
2327  Comments.push_back(FormatTok);
2328  } while (!eof());
2329 
2330  distributeComments(Comments, nullptr);
2331  Comments.clear();
2332 }
2333 
2334 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2335  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2336  if (MustBreakBeforeNextToken) {
2337  Line->Tokens.back().Tok->MustBreakBefore = true;
2338  MustBreakBeforeNextToken = false;
2339  }
2340 }
2341 
2342 } // end namespace format
2343 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
bool AfterUnion
Wrap union definitions.
Definition: Format.h:641
Indent in all namespaces.
Definition: Format.h:1046
Token Tok
The Token.
Definition: FormatToken.h:119
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:962
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:883
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:210
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parse/Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:151
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:106
bool isBinaryOperator() const
Definition: FormatToken.h:383
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:125
tok::TokenKind getKind() const
Definition: Token.h:89
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:353
bool AfterObjCDeclaration
Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
Definition: Format.h:613
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:673
Should be used for Java.
Definition: Format.h:955
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:90
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:45
bool isNot(T Kind) const
Definition: FormatToken.h:308
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1050
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:301
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:957
ContinuationIndenter * Indenter
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:680
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterFunction
Wrap function definitions.
Definition: Format.h:595
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:115
SourceLocation getEnd() const
#define false
Definition: stdbool.h:33
static bool continuesLineComment(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:292
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:602
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:132
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:163
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:52
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:41
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1036
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:969
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
/file This file defines classes for searching and anlyzing source code clones.
bool IsCpp() const
Definition: Format.h:966
static bool isLineComment(const FormatToken &FormatTok)
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:290
Should be used for TableGen code.
Definition: Format.h:964
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:627
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:313
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:567
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:43
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:157
bool AfterClass
Wrap class definitions.
Definition: Format.h:549
StringRef Text
Definition: Format.cpp:1245
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:269
bool isStringLiteral() const
Definition: FormatToken.h:324
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:611
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:129
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:169
void startToken()
Reset all flags to cleared.
Definition: Token.h:168